Files
pdfsign/verify/verify.go
Paul van Brouwershaven bacc810a68 Fix linting errors
2024-11-14 13:33:46 +01:00

409 lines
12 KiB
Go
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package verify
import (
"bytes"
"crypto"
"crypto/x509"
"encoding/asn1"
"fmt"
"io"
"os"
"reflect"
"strconv"
"strings"
"time"
"github.com/digitorus/pdf"
"github.com/digitorus/pdfsign/revocation"
"github.com/digitorus/pkcs7"
"github.com/digitorus/timestamp"
"golang.org/x/crypto/ocsp"
)
type Response struct {
Error string
DocumentInfo DocumentInfo
Signers []Signer
}
type Signer struct {
Name string `json:"name"`
Reason string `json:"reason"`
Location string `json:"location"`
ContactInfo string `json:"contact_info"`
ValidSignature bool `json:"valid_signature"`
TrustedIssuer bool `json:"trusted_issuer"`
RevokedCertificate bool `json:"revoked_certificate"`
Certificates []Certificate `json:"certificates"`
TimeStamp *timestamp.Timestamp `json:"time_stamp"`
}
type Certificate struct {
Certificate *x509.Certificate `json:"certificate"`
VerifyError string `json:"verify_error"`
OCSPResponse *ocsp.Response `json:"ocsp_response"`
OCSPEmbedded bool `json:"ocsp_embedded"`
CRLRevoked time.Time `json:"crl_revoked"`
CRLEmbedded bool `json:"crl_embedded"`
}
// DocumentInfo contains document information.
type DocumentInfo struct {
Author string `json:"author"`
Creator string `json:"creator"`
Hash string `json:"hash"`
Name string `json:"name"`
Permission string `json:"permission"`
Producer string `json:"producer"`
Subject string `json:"subject"`
Title string `json:"title"`
Pages int `json:"pages"`
Keywords []string `json:"keywords"`
ModDate time.Time `json:"mod_date"`
CreationDate time.Time `json:"creation_date"`
}
func File(file *os.File) (apiResp *Response, err error) {
finfo, _ := file.Stat()
if _, err := file.Seek(0, 0); err != nil {
return nil, err
}
return Reader(file, finfo.Size())
}
func Reader(file io.ReaderAt, size int64) (apiResp *Response, err error) {
var documentInfo DocumentInfo
defer func() {
if r := recover(); r != nil {
apiResp = nil
err = fmt.Errorf("Failed to verify file (%v)", r)
}
}()
apiResp = &Response{}
rdr, err := pdf.NewReader(file, size)
if err != nil {
return nil, fmt.Errorf("Failed to open file: %v", err)
}
// AcroForm will contain a SigFlags value if the form contains a digital signature
t := rdr.Trailer().Key("Root").Key("AcroForm").Key("SigFlags")
if t.IsNull() {
return nil, fmt.Errorf("No digital signature in document")
}
// Walk over the cross references in the document
for _, x := range rdr.Xref() {
// Get the xref object Value
v := rdr.Resolve(x.Ptr(), x.Ptr())
// get document info
parseDocumentInfo(v, &documentInfo)
// We must have a Filter Adobe.PPKLite
if v.Key("Filter").Name() != "Adobe.PPKLite" {
continue
}
signer := Signer{
Name: v.Key("Name").Text(),
Reason: v.Key("Reason").Text(),
Location: v.Key("Location").Text(),
ContactInfo: v.Key("ContactInfo").Text(),
}
// (Required) The signature value. When ByteRange is present, the
// value shall be a hexadecimal string (see 7.3.4.3, “Hexadecimal
// Strings”) representing the value of the byte range digest.
// For public-key signatures, Contents should be either a DER-encoded
// PKCS#1 binary data object or a DER-encoded PKCS#7 binary data object.
// Space for the Contents value must be allocated before the message
// digest is computed. (See 7.3.4, “String Objects“)
p7, err := pkcs7.Parse([]byte(v.Key("Contents").RawString()))
if err != nil {
// fmt.Println(err)
continue
}
// An array of pairs of integers (starting byte offset, length in
// bytes) that shall describe the exact byte range for the digest
// calculation. Multiple discontiguous byte ranges shall be used to
// describe a digest that does not include the signature value (the
// Contents entry) itself.
for i := 0; i < v.Key("ByteRange").Len(); i++ {
// As the byte range comes in pairs, we increment one extra
i++
// Read the byte range from the raw file and add it to the contents.
// This content will be hashed with the corresponding algorithm to
// verify the signature.
content, err := io.ReadAll(io.NewSectionReader(file, v.Key("ByteRange").Index(i-1).Int64(), v.Key("ByteRange").Index(i).Int64()))
if err != nil {
apiResp.Error = fmt.Sprintln("Failed to get ByteRange:", i, err)
}
p7.Content = append(p7.Content, content...)
}
// Signer certificate
// http://www.alvestrand.no/objectid/1.2.840.113549.1.9.html
// http://www.alvestrand.no/objectid/1.2.840.113583.1.1.8.html
// var isn []byte
for _, s := range p7.Signers {
// isn = s.IssuerAndSerialNumber.IssuerName.FullBytes
// for _, a := range s.AuthenticatedAttributes {
// fmt.Printf("A: %v, %#v\n", s.IssuerAndSerialNumber.SerialNumber, a.Type)
//}
// Timestamp
// http://www.alvestrand.no/objectid/1.2.840.113549.1.9.16.2.14.html
// Timestamp
// 1.2.840.113549.1.9.16.2.14 - RFC 3161 id-aa-timeStampToken
for _, attr := range s.UnauthenticatedAttributes {
// fmt.Printf("U: %v, %#v\n", s.IssuerAndSerialNumber.SerialNumber, attr.Type)
if attr.Type.Equal(asn1.ObjectIdentifier{1, 2, 840, 113549, 1, 9, 16, 2, 14}) {
// fmt.Println("Found timestamp")
signer.TimeStamp, err = timestamp.Parse(attr.Value.Bytes)
if err != nil {
apiResp.Error = fmt.Sprintln("Failed to parse timestamp", err)
} else {
r := bytes.NewReader(s.EncryptedDigest)
h := crypto.SHA256.New()
b := make([]byte, 32)
for {
n, err := r.Read(b)
if err == io.EOF {
break
}
h.Write(b[:n])
}
if !bytes.Equal(h.Sum(nil), signer.TimeStamp.HashedMessage) {
apiResp.Error = fmt.Sprintln("Hash in timestamp is different from pkcs7")
}
break
}
}
}
}
// Directory of certificates, including OCSP
certPool := x509.NewCertPool()
for _, cert := range p7.Certificates {
certPool.AddCert(cert)
}
// Verify the digital signature of the pdf file.
err = p7.VerifyWithChain(certPool)
if err != nil {
err = p7.Verify()
if err == nil {
signer.ValidSignature = true
signer.TrustedIssuer = false
} else {
apiResp.Error = fmt.Sprintln("Failed to verify signature:", err)
}
} else {
signer.ValidSignature = true
signer.TrustedIssuer = true
}
// PDF signature certificate revocation information attribute (1.2.840.113583.1.1.8)
var revInfo revocation.InfoArchival
_ = p7.UnmarshalSignedAttribute(asn1.ObjectIdentifier{1, 2, 840, 113583, 1, 1, 8}, &revInfo)
// Parse OCSP response
ocspStatus := make(map[string]*ocsp.Response)
for _, o := range revInfo.OCSP {
resp, err := ocsp.ParseResponse(o.FullBytes, nil)
if err != nil {
apiResp.Error = fmt.Sprintln("Failed to parse or verify OCSP response", err)
ocspStatus[fmt.Sprintf("%x", resp.SerialNumber)] = nil
} else {
ocspStatus[fmt.Sprintf("%x", resp.SerialNumber)] = resp
}
}
// Build certificate chains and verify revocation status
for _, cert := range p7.Certificates {
var c Certificate
c.Certificate = cert
chain, err := cert.Verify(x509.VerifyOptions{
Intermediates: certPool,
CurrentTime: cert.NotBefore,
KeyUsages: []x509.ExtKeyUsage{x509.ExtKeyUsageAny},
})
if err != nil {
c.VerifyError = err.Error()
}
if resp, ok := ocspStatus[fmt.Sprintf("%x", cert.SerialNumber)]; ok {
c.OCSPResponse = resp
c.OCSPEmbedded = true
if resp.Status != ocsp.Good {
signer.RevokedCertificate = true
}
if len(chain) > 0 && len(chain[0]) > 1 {
issuer := chain[0][1]
if resp.Certificate != nil {
err = resp.Certificate.CheckSignatureFrom(issuer)
if err != nil {
apiResp.Error = fmt.Sprintln("OCSP signing cerificate not from certificate issuer:", err)
}
} else {
// CA Signed response
err = resp.CheckSignatureFrom(issuer)
if err != nil {
apiResp.Error = fmt.Sprintln("Failed to verify OCSP response signature:", err)
}
}
}
}
// else {
// // Check OCSP status for certificate out of band
// }
// Add certificate to result
signer.Certificates = append(signer.Certificates, c)
}
// Certificate revocation lists when included in this document
// for _, crl := range p7.CRLs {
// //var crlissuer *pkix.Name
// //crlissuerdr.FillFromRDNSequence(&crl.TBSCertList.Issuer)
// if len(crl.TBSCertList.RevokedCertificates) > 0 {
// }
// //apiResp.Error = fmt.Sprintf("CRL %v , with %d entries\n", crl.TBSCertList.Issuer, len(crl.TBSCertList.RevokedCertificates))
// // TODO(vanbroup): Check revocation via CRL
// // signer.RevokedCertificate = true
// }
// Parse CRL file
// for _, c := range revInfo.CRL {
// crl, err := x509.ParseCRL(c.FullBytes)
// if err != nil {
// apiResp.Error = fmt.Sprintln("Failed to parse or verify embedded CRL")
// }
// if len(crl.TBSCertList.RevokedCertificates) > 0 {
// }
// //var crlissuer *pkix.Name
// //crlissuerdr.FillFromRDNSequence(&crl.TBSCertList.Issuer)
// //apiResp.Error = fmt.Sprintf("CRL %v , with %d entries\n", crl.TBSCertList.Issuer, len(crl.TBSCertList.RevokedCertificates))
// // TODO(vanbroup): Check revocation via CRL
// // signer.RevokedCertificate = true
// }
// If SubFilter is adbe.pkcs7.detached or adbe.pkcs7.sha1, this entry
// shall not be used, and the certificate chain shall be put in the PKCS#7
// envelope in Contents.
// v.Key("Cert").Text()
apiResp.Signers = append(apiResp.Signers, signer)
}
if apiResp == nil {
err = fmt.Errorf("Document looks to have a signature but got no results")
}
apiResp.DocumentInfo = documentInfo
return
}
// parseDocumentInfo parses document information.
func parseDocumentInfo(v pdf.Value, documentInfo *DocumentInfo) {
keys := []string{
"Author", "CreationDate", "Creator", "Hash", "Keywords", "ModDate",
"Name", "Pages", "Permission", "Producer", "Subject", "Title",
}
for _, key := range keys {
value := v.Key(key)
if !value.IsNull() {
// get string value
valueStr := value.Text()
// get struct field
elem := reflect.ValueOf(documentInfo).Elem()
field := elem.FieldByName(key)
switch key {
// parse dates
case "CreationDate", "ModDate":
t, _ := parseDate(valueStr)
field.Set(reflect.ValueOf(t))
// parse pages
case "Pages":
i, _ := strconv.Atoi(valueStr)
documentInfo.Pages = i
case "Keywords":
documentInfo.Keywords = parseKeywords(valueStr)
default:
field.Set(reflect.ValueOf(valueStr))
}
}
}
}
// parseDate parses pdf formatted dates.
func parseDate(v string) (time.Time, error) {
// PDF Date Format
// (D:YYYYMMDDHHmmSSOHH'mm')
//
// where
//
// YYYY is the year
// MM is the month
// DD is the day (01-31)
// HH is the hour (00-23)
// mm is the minute (00-59)
// SS is the second (00-59)
// O is the relationship of local time to Universal Time (UT), denoted by one of the characters +, -, or Z (see below)
// HH followed by ' is the absolute value of the offset from UT in hours (00-23)
// mm followed by ' is the absolute value of the offset from UT in minutes (00-59)
// 2006-01-02T15:04:05Z07:00
// (D:YYYYMMDDHHmmSSOHH'mm')
return time.Parse("D:20060102150405Z07'00'", v)
}
// parseKeywords parses keywords pdf meta data.
func parseKeywords(value string) []string {
// keywords must be separated by commas or semicolons or could be just separated with spaces, after the semicolon could be a space
// https://stackoverflow.com/questions/44608608/the-separator-between-keywords-in-pdf-meta-data
separators := []string{", ", ": ", ",", ":", " "}
for _, s := range separators {
if strings.Contains(value, s) {
return strings.Split(value, s)
}
}
return []string{value}
}
// func walk(t pdf.Value, pad int) {
// for _, k := range t.Keys() {
// v := t.Key(k)
// if v.Kind() == pdf.Array || v.Kind() == pdf.Dict {
// pad++
// walk(v, pad)
// }
// }
// }