WIP - Respond with document info
This commit is contained in:
@@ -18,6 +18,9 @@ import (
|
|||||||
"github.com/digitorus/pkcs7"
|
"github.com/digitorus/pkcs7"
|
||||||
"github.com/digitorus/timestamp"
|
"github.com/digitorus/timestamp"
|
||||||
|
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/crypto/ocsp"
|
"golang.org/x/crypto/ocsp"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -312,21 +315,24 @@ func Reader(file io.ReaderAt, size int64) (apiResp *Response, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DocumentInfo contains document information
|
||||||
type DocumentInfo struct {
|
type DocumentInfo struct {
|
||||||
Author,
|
Author,
|
||||||
CreationDate,
|
|
||||||
Creator,
|
Creator,
|
||||||
Hash,
|
Hash,
|
||||||
Keywords,
|
|
||||||
ModDate,
|
|
||||||
Name,
|
Name,
|
||||||
Pages,
|
|
||||||
Permission,
|
Permission,
|
||||||
Producer,
|
Producer,
|
||||||
Subject,
|
Subject,
|
||||||
Title string
|
Title string
|
||||||
|
|
||||||
|
Pages int
|
||||||
|
Keywords []string
|
||||||
|
ModDate,
|
||||||
|
CreationDate time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getDocumentInfo parses document information
|
||||||
func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
|
func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
|
||||||
keys := []string{"Author", "CreationDate", "Creator", "Hash", "Keywords", "ModDate",
|
keys := []string{"Author", "CreationDate", "Creator", "Hash", "Keywords", "ModDate",
|
||||||
"Name", "Pages", "Permission", "Producer", "Subject", "Title"}
|
"Name", "Pages", "Permission", "Producer", "Subject", "Title"}
|
||||||
@@ -334,14 +340,67 @@ func getDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
|
|||||||
for _, key := range keys {
|
for _, key := range keys {
|
||||||
value := v.Key(key)
|
value := v.Key(key)
|
||||||
if !value.IsNull() {
|
if !value.IsNull() {
|
||||||
|
// get string value
|
||||||
valueStr := value.Text()
|
valueStr := value.Text()
|
||||||
t := reflect.ValueOf(documentInfo).Elem()
|
|
||||||
val := t.FieldByName(key)
|
// get struct field
|
||||||
val.Set(reflect.ValueOf(valueStr))
|
elem := reflect.ValueOf(documentInfo).Elem()
|
||||||
|
field := elem.FieldByName(key)
|
||||||
|
|
||||||
|
switch key {
|
||||||
|
// parse dates
|
||||||
|
case "CreationDate", "ModDate":
|
||||||
|
t, _ := parseDate(valueStr)
|
||||||
|
field.Set(reflect.ValueOf(t))
|
||||||
|
// parse pages
|
||||||
|
case "Pages":
|
||||||
|
i, _ := strconv.Atoi(valueStr)
|
||||||
|
documentInfo.Pages = i
|
||||||
|
case "Keywords":
|
||||||
|
documentInfo.Keywords = parseKeywords(valueStr)
|
||||||
|
default:
|
||||||
|
field.Set(reflect.ValueOf(valueStr))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseDate parses pdf formatted dates
|
||||||
|
func parseDate(v string) (time.Time, error) {
|
||||||
|
//PDF Date Format
|
||||||
|
//(D:YYYYMMDDHHmmSSOHH'mm')
|
||||||
|
//
|
||||||
|
//where
|
||||||
|
//
|
||||||
|
//YYYY is the year
|
||||||
|
//MM is the month
|
||||||
|
//DD is the day (01-31)
|
||||||
|
//HH is the hour (00-23)
|
||||||
|
//mm is the minute (00-59)
|
||||||
|
//SS is the second (00-59)
|
||||||
|
//O is the relationship of local time to Universal Time (UT), denoted by one of the characters +, -, or Z (see below)
|
||||||
|
//HH followed by ' is the absolute value of the offset from UT in hours (00-23)
|
||||||
|
//mm followed by ' is the absolute value of the offset from UT in minutes (00-59)
|
||||||
|
|
||||||
|
//2006-01-02T15:04:05Z07:00
|
||||||
|
//(D:YYYYMMDDHHmmSSOHH'mm')
|
||||||
|
return time.Parse("D:20060102150405Z07'00'", v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseKeywords parses keywords pdf meta data
|
||||||
|
func parseKeywords(value string) []string {
|
||||||
|
//keywords must be separated by commas or semicolons or could be just separated with spaces, after the semicolon could be a space
|
||||||
|
//https://stackoverflow.com/questions/44608608/the-separator-between-keywords-in-pdf-meta-data
|
||||||
|
separators := []string{", ", ": ", ",", ":", " "}
|
||||||
|
for _, s := range separators {
|
||||||
|
if strings.Contains(value, s) {
|
||||||
|
return strings.Split(value, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return []string{value}
|
||||||
|
}
|
||||||
|
|
||||||
func walk(t pdf.Value, pad int) {
|
func walk(t pdf.Value, pad int) {
|
||||||
for _, k := range t.Keys() {
|
for _, k := range t.Keys() {
|
||||||
v := t.Key(k)
|
v := t.Key(k)
|
||||||
|
Reference in New Issue
Block a user