WIP - Respond with document info
This commit is contained in:
@@ -18,6 +18,9 @@ import (
|
||||
"github.com/digitorus/pkcs7"
|
||||
"github.com/digitorus/timestamp"
|
||||
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/crypto/ocsp"
|
||||
)
|
||||
|
||||
@@ -312,21 +315,24 @@ func Reader(file io.ReaderAt, size int64) (apiResp *Response, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// DocumentInfo contains document information
|
||||
type DocumentInfo struct {
|
||||
Author,
|
||||
CreationDate,
|
||||
Creator,
|
||||
Hash,
|
||||
Keywords,
|
||||
ModDate,
|
||||
Name,
|
||||
Pages,
|
||||
Permission,
|
||||
Producer,
|
||||
Subject,
|
||||
Title string
|
||||
|
||||
Pages int
|
||||
Keywords []string
|
||||
ModDate,
|
||||
CreationDate time.Time
|
||||
}
|
||||
|
||||
// getDocumentInfo parses document information
|
||||
func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
|
||||
keys := []string{"Author", "CreationDate", "Creator", "Hash", "Keywords", "ModDate",
|
||||
"Name", "Pages", "Permission", "Producer", "Subject", "Title"}
|
||||
@@ -334,14 +340,67 @@ func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
|
||||
for _, key := range keys {
|
||||
value := v.Key(key)
|
||||
if !value.IsNull() {
|
||||
// get string value
|
||||
valueStr := value.Text()
|
||||
t := reflect.ValueOf(documentInfo).Elem()
|
||||
val := t.FieldByName(key)
|
||||
val.Set(reflect.ValueOf(valueStr))
|
||||
|
||||
// get struct field
|
||||
elem := reflect.ValueOf(documentInfo).Elem()
|
||||
field := elem.FieldByName(key)
|
||||
|
||||
switch key {
|
||||
// parse dates
|
||||
case "CreationDate", "ModDate":
|
||||
t, _ := parseDate(valueStr)
|
||||
field.Set(reflect.ValueOf(t))
|
||||
// parse pages
|
||||
case "Pages":
|
||||
i, _ := strconv.Atoi(valueStr)
|
||||
documentInfo.Pages = i
|
||||
case "Keywords":
|
||||
documentInfo.Keywords = parseKeywords(valueStr)
|
||||
default:
|
||||
field.Set(reflect.ValueOf(valueStr))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parseDate parses pdf formatted dates
|
||||
func parseDate(v string) (time.Time, error) {
|
||||
//PDF Date Format
|
||||
//(D:YYYYMMDDHHmmSSOHH'mm')
|
||||
//
|
||||
//where
|
||||
//
|
||||
//YYYY is the year
|
||||
//MM is the month
|
||||
//DD is the day (01-31)
|
||||
//HH is the hour (00-23)
|
||||
//mm is the minute (00-59)
|
||||
//SS is the second (00-59)
|
||||
//O is the relationship of local time to Universal Time (UT), denoted by one of the characters +, -, or Z (see below)
|
||||
//HH followed by ' is the absolute value of the offset from UT in hours (00-23)
|
||||
//mm followed by ' is the absolute value of the offset from UT in minutes (00-59)
|
||||
|
||||
//2006-01-02T15:04:05Z07:00
|
||||
//(D:YYYYMMDDHHmmSSOHH'mm')
|
||||
return time.Parse("D:20060102150405Z07'00'", v)
|
||||
}
|
||||
|
||||
// parseKeywords parses keywords pdf meta data
|
||||
func parseKeywords(value string) []string {
|
||||
//keywords must be separated by commas or semicolons or could be just separated with spaces, after the semicolon could be a space
|
||||
//https://stackoverflow.com/questions/44608608/the-separator-between-keywords-in-pdf-meta-data
|
||||
separators := []string{", ", ": ", ",", ":", " "}
|
||||
for _, s := range separators {
|
||||
if strings.Contains(value, s) {
|
||||
return strings.Split(value, s)
|
||||
}
|
||||
}
|
||||
|
||||
return []string{value}
|
||||
}
|
||||
|
||||
func walk(t pdf.Value, pad int) {
|
||||
for _, k := range t.Keys() {
|
||||
v := t.Key(k)
|
||||
|
Reference in New Issue
Block a user