WIP - Respond with document info

This commit is contained in:
tigp
2018-06-23 14:59:24 +02:00
parent dbc0d96365
commit 8c33418d91

View File

@@ -18,6 +18,9 @@ import (
"github.com/digitorus/pkcs7"
"github.com/digitorus/timestamp"
"strconv"
"strings"
"golang.org/x/crypto/ocsp"
)
@@ -312,21 +315,24 @@ func Reader(file io.ReaderAt, size int64) (apiResp *Response, err error) {
return
}
// DocumentInfo contains document information
type DocumentInfo struct {
Author,
CreationDate,
Creator,
Hash,
Keywords,
ModDate,
Name,
Pages,
Permission,
Producer,
Subject,
Title string
Pages int
Keywords []string
ModDate,
CreationDate time.Time
}
// getDocumentInfo parses document information
func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
keys := []string{"Author", "CreationDate", "Creator", "Hash", "Keywords", "ModDate",
"Name", "Pages", "Permission", "Producer", "Subject", "Title"}
@@ -334,14 +340,67 @@ func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
for _, key := range keys {
value := v.Key(key)
if !value.IsNull() {
// get string value
valueStr := value.Text()
t := reflect.ValueOf(documentInfo).Elem()
val := t.FieldByName(key)
val.Set(reflect.ValueOf(valueStr))
// get struct field
elem := reflect.ValueOf(documentInfo).Elem()
field := elem.FieldByName(key)
switch key {
// parse dates
case "CreationDate", "ModDate":
t, _ := parseDate(valueStr)
field.Set(reflect.ValueOf(t))
// parse pages
case "Pages":
i, _ := strconv.Atoi(valueStr)
documentInfo.Pages = i
case "Keywords":
documentInfo.Keywords = parseKeywords(valueStr)
default:
field.Set(reflect.ValueOf(valueStr))
}
}
}
}
// parseDate parses pdf formatted dates
func parseDate(v string) (time.Time, error) {
//PDF Date Format
//(D:YYYYMMDDHHmmSSOHH'mm')
//
//where
//
//YYYY is the year
//MM is the month
//DD is the day (01-31)
//HH is the hour (00-23)
//mm is the minute (00-59)
//SS is the second (00-59)
//O is the relationship of local time to Universal Time (UT), denoted by one of the characters +, -, or Z (see below)
//HH followed by ' is the absolute value of the offset from UT in hours (00-23)
//mm followed by ' is the absolute value of the offset from UT in minutes (00-59)
//2006-01-02T15:04:05Z07:00
//(D:YYYYMMDDHHmmSSOHH'mm')
return time.Parse("D:20060102150405Z07'00'", v)
}
// parseKeywords parses keywords pdf meta data
func parseKeywords(value string) []string {
//keywords must be separated by commas or semicolons or could be just separated with spaces, after the semicolon could be a space
//https://stackoverflow.com/questions/44608608/the-separator-between-keywords-in-pdf-meta-data
separators := []string{", ", ": ", ",", ":", " "}
for _, s := range separators {
if strings.Contains(value, s) {
return strings.Split(value, s)
}
}
return []string{value}
}
func walk(t pdf.Value, pad int) {
for _, k := range t.Keys() {
v := t.Key(k)