Use UTF-16BE encoding when required

Fix #2
This commit is contained in:
Paul van Brouwershaven
2023-03-10 09:07:27 +01:00
parent 7fd1d3df69
commit 4fb6fafba6
4 changed files with 115 additions and 8 deletions

View File

@@ -11,6 +11,9 @@ import (
"time"
"github.com/digitorus/pdf"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
func findFirstPage(parent pdf.Value) (pdf.Value, error) {
@@ -35,11 +38,28 @@ func findFirstPage(parent pdf.Value) (pdf.Value, error) {
}
func pdfString(text string) string {
if !isASCII(text) {
// UTF-16BE
enc := unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewEncoder()
res, _, err := transform.String(enc, text)
if err != nil {
panic(err)
}
return "(" + res + ")"
}
// UTF-8
// (\357\273\277Layer 1) % UTF-8 Layer 1 Name
// <EF BB BF DA AF DA 86 D9 BE DA 98> % UTF-8 Layer 2 Name
// text = "\357\273\277" + text
// text = hex.EncodeToString([]byte(text))
// text = "<" + text + ">"
// PDFDocEncoded
text = strings.Replace(text, "\\", "\\\\", -1)
text = strings.Replace(text, ")", "\\)", -1)
text = strings.Replace(text, "(", "\\(", -1)
text = strings.Replace(text, "\r", "\\r", -1)
text = "(" + text + ")"
return text
@@ -167,3 +187,12 @@ func getOIDFromHashAlgorithm(target crypto.Hash) asn1.ObjectIdentifier {
}
return nil
}
func isASCII(s string) bool {
for _, r := range s {
if r > '\u007F' {
return false
}
}
return true
}