Fix PDF 1.7 with XRef Stream causes panic #61

This commit is contained in:
Paul van Brouwershaven
2025-02-25 18:13:10 +01:00
parent 1fb39a3ce9
commit 681997c680
4 changed files with 159 additions and 76 deletions

View File

@@ -49,6 +49,10 @@ func (context *SignContext) writeTrailer() error {
if _, err := context.OutputBuffer.Write([]byte(trailer_string)); err != nil { if _, err := context.OutputBuffer.Write([]byte(trailer_string)); err != nil {
return err return err
} }
} else if context.PDFReader.XrefInformation.Type == "stream" {
if _, err := context.OutputBuffer.Write([]byte("startxref\n")); err != nil {
return err
}
} }
// Write the new xref start position. // Write the new xref start position.

View File

@@ -8,8 +8,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"strconv"
"strings"
) )
type xrefEntry struct { type xrefEntry struct {
@@ -18,10 +16,11 @@ type xrefEntry struct {
} }
const ( const (
xrefStreamColumns = 5 xrefStreamColumns = 6 // Column width (1+4+1)
xrefStreamPredictor = 12 xrefStreamPredictor = 12
pngSubPredictor = 11 defaultPredictor = 1 // No prediction (the default value)
pngUpPredictor = 12 pngSubPredictor = 11 // PNG prediction (on encoding, PNG Sub on all rows)
pngUpPredictor = 12 // PNG prediction (on encoding, PNG Up on all rows)
objectFooter = "\nendobj\n" objectFooter = "\nendobj\n"
) )
@@ -100,35 +99,23 @@ func (context *SignContext) writeXref() error {
} }
func (context *SignContext) getLastObjectIDFromXref() (uint32, error) { func (context *SignContext) getLastObjectIDFromXref() (uint32, error) {
// Seek to the start of the xref table xref := context.PDFReader.Xref()
if _, err := context.InputFile.Seek(context.PDFReader.XrefInformation.StartPos, io.SeekStart); err != nil { if len(xref) == 0 {
return 0, fmt.Errorf("failed to seek to xref table: %w", err) return 0, fmt.Errorf("no xref entries found")
} }
// Read the existing xref table // Find highest used object ID
xrefContent := make([]byte, context.PDFReader.XrefInformation.Length) var maxID uint32
if _, err := context.InputFile.Read(xrefContent); err != nil { for _, entry := range xref {
return 0, fmt.Errorf("failed to read xref table: %w", err) ptr := entry.Ptr()
// TODO: Check if in use (&& entry.offset != 0)
if ptr.GetID() > maxID {
maxID = ptr.GetID()
}
} }
// Parse the xref header return maxID + 1, nil
xrefLines := strings.Split(string(xrefContent), "\n")
xrefHeader := strings.Fields(xrefLines[1])
if len(xrefHeader) != 2 {
return 0, fmt.Errorf("invalid xref header format")
}
firstObjectID, err := strconv.ParseUint(xrefHeader[0], 10, 32)
if err != nil {
return 0, fmt.Errorf("invalid first object ID: %w", err)
}
itemCount, err := strconv.ParseUint(xrefHeader[1], 10, 32)
if err != nil {
return 0, fmt.Errorf("invalid item count: %w", err)
}
return uint32(firstObjectID + itemCount), nil
} }
// writeIncrXrefTable writes the incremental cross-reference table to the output buffer. // writeIncrXrefTable writes the incremental cross-reference table to the output buffer.
@@ -170,11 +157,14 @@ func (context *SignContext) writeIncrXrefTable() error {
// writeXrefStream writes the cross-reference stream to the output buffer. // writeXrefStream writes the cross-reference stream to the output buffer.
func (context *SignContext) writeXrefStream() error { func (context *SignContext) writeXrefStream() error {
buffer := new(bytes.Buffer) var buffer bytes.Buffer
predictor := context.PDFReader.Trailer().Key("DecodeParms").Key("Predictor").Int64() predictor := context.PDFReader.Trailer().Key("DecodeParms").Key("Predictor").Int64()
if predictor == 0 {
predictor = xrefStreamPredictor
}
if err := writeXrefStreamEntries(buffer, context); err != nil { if err := writeXrefStreamEntries(&buffer, context); err != nil {
return fmt.Errorf("failed to write xref stream entries: %w", err) return fmt.Errorf("failed to write xref stream entries: %w", err)
} }
@@ -183,19 +173,32 @@ func (context *SignContext) writeXrefStream() error {
return fmt.Errorf("failed to encode xref stream: %w", err) return fmt.Errorf("failed to encode xref stream: %w", err)
} }
if err := writeXrefStreamHeader(context, len(streamBytes)); err != nil { var xrefStreamObject bytes.Buffer
if err := writeXrefStreamHeader(&xrefStreamObject, context, len(streamBytes)); err != nil {
return fmt.Errorf("failed to write xref stream header: %w", err) return fmt.Errorf("failed to write xref stream header: %w", err)
} }
if err := writeXrefStreamContent(context, streamBytes); err != nil { if err := writeXrefStreamContent(&xrefStreamObject, streamBytes); err != nil {
return fmt.Errorf("failed to write xref stream content: %w", err) return fmt.Errorf("failed to write xref stream content: %w", err)
} }
_, err = context.addObject(xrefStreamObject.Bytes())
if err != nil {
return fmt.Errorf("failed to add xref stream object: %w", err)
}
return nil return nil
} }
// writeXrefStreamEntries writes the individual entries for the xref stream. // writeXrefStreamEntries writes the individual entries for the xref stream.
func writeXrefStreamEntries(buffer *bytes.Buffer, context *SignContext) error { func writeXrefStreamEntries(buffer *bytes.Buffer, context *SignContext) error {
// Write updated entries first
for _, entry := range context.updatedXrefEntries {
writeXrefStreamLine(buffer, 1, int(entry.Offset), 0)
}
// Write new entries
for _, entry := range context.newXrefEntries { for _, entry := range context.newXrefEntries {
writeXrefStreamLine(buffer, 1, int(entry.Offset), 0) writeXrefStreamLine(buffer, 1, int(entry.Offset), 0)
} }
@@ -205,60 +208,77 @@ func writeXrefStreamEntries(buffer *bytes.Buffer, context *SignContext) error {
// encodeXrefStream applies the appropriate encoding to the xref stream. // encodeXrefStream applies the appropriate encoding to the xref stream.
func encodeXrefStream(data []byte, predictor int64) ([]byte, error) { func encodeXrefStream(data []byte, predictor int64) ([]byte, error) {
var streamBytes []byte // Use FlateDecode without prediction for xref streams
var err error var b bytes.Buffer
w := zlib.NewWriter(&b)
switch predictor { if _, err := w.Write(data); err != nil {
case pngSubPredictor: return nil, err
streamBytes, err = EncodePNGSUBBytes(xrefStreamColumns, data)
case pngUpPredictor:
streamBytes, err = EncodePNGUPBytes(xrefStreamColumns, data)
default:
return nil, fmt.Errorf("unsupported predictor: %d", predictor)
} }
w.Close()
if err != nil { return b.Bytes(), nil
return nil, fmt.Errorf("failed to encode xref stream: %w", err)
}
return streamBytes, nil
} }
// writeXrefStreamHeader writes the header for the xref stream. // writeXrefStreamHeader writes the header for the xref stream.
func writeXrefStreamHeader(context *SignContext, streamLength int) error { func writeXrefStreamHeader(buffer *bytes.Buffer, context *SignContext, streamLength int) error {
id := context.PDFReader.Trailer().Key("ID") id := context.PDFReader.Trailer().Key("ID")
id0 := hex.EncodeToString([]byte(id.Index(0).RawString()))
id1 := hex.EncodeToString([]byte(id.Index(0).RawString()))
var buffer bytes.Buffer // Calculate total entries and create index array
buffer.WriteString(fmt.Sprintf("%d 0 obj\n", context.SignData.objectId)) totalEntries := uint32(context.PDFReader.XrefInformation.ItemCount)
var indexArray []uint32
// Add existing entries section
if len(context.updatedXrefEntries) > 0 {
for _, entry := range context.updatedXrefEntries {
indexArray = append(indexArray, entry.ID, 1)
}
}
// Add new entries section
if len(context.newXrefEntries) > 0 {
indexArray = append(indexArray, context.lastXrefID+1, uint32(len(context.newXrefEntries)))
totalEntries += uint32(len(context.newXrefEntries))
}
buffer.WriteString("<< /Type /XRef\n") buffer.WriteString("<< /Type /XRef\n")
buffer.WriteString(fmt.Sprintf(" /Length %d\n", streamLength)) buffer.WriteString(fmt.Sprintf(" /Length %d\n", streamLength))
buffer.WriteString(" /Filter /FlateDecode\n") buffer.WriteString(" /Filter /FlateDecode\n")
buffer.WriteString(fmt.Sprintf(" /DecodeParms << /Columns %d /Predictor %d >>\n", xrefStreamColumns, xrefStreamPredictor)) // Change W array to [1 4 1] to accommodate larger offsets
buffer.WriteString(" /W [ 1 3 1 ]\n") buffer.WriteString(" /W [ 1 4 1 ]\n")
buffer.WriteString(fmt.Sprintf(" /Prev %d\n", context.PDFReader.XrefInformation.StartPos)) buffer.WriteString(fmt.Sprintf(" /Prev %d\n", context.PDFReader.XrefInformation.StartPos))
buffer.WriteString(fmt.Sprintf(" /Size %d\n", context.PDFReader.XrefInformation.ItemCount+int64(len(context.newXrefEntries))+1)) buffer.WriteString(fmt.Sprintf(" /Size %d\n", totalEntries+1))
buffer.WriteString(fmt.Sprintf(" /Index [ %d 4 ]\n", context.PDFReader.XrefInformation.ItemCount))
buffer.WriteString(fmt.Sprintf(" /Root %d 0 R\n", context.CatalogData.ObjectId))
buffer.WriteString(fmt.Sprintf(" /ID [<%s><%s>]\n", id0, id1))
buffer.WriteString(">>\n")
_, err := context.OutputBuffer.Write(buffer.Bytes()) // Write index array if we have entries
return err if len(indexArray) > 0 {
buffer.WriteString(" /Index [")
for _, idx := range indexArray {
buffer.WriteString(fmt.Sprintf(" %d", idx))
}
buffer.WriteString(" ]\n")
}
buffer.WriteString(fmt.Sprintf(" /Root %d 0 R\n", context.CatalogData.ObjectId))
if !id.IsNull() {
id0 := hex.EncodeToString([]byte(id.Index(0).RawString()))
id1 := hex.EncodeToString([]byte(id.Index(1).RawString()))
buffer.WriteString(fmt.Sprintf(" /ID [<%s><%s>]\n", id0, id1))
}
buffer.WriteString(">>\n")
return nil
} }
// writeXrefStreamContent writes the content of the xref stream. // writeXrefStreamContent writes the content of the xref stream.
func writeXrefStreamContent(context *SignContext, streamBytes []byte) error { func writeXrefStreamContent(buffer *bytes.Buffer, streamBytes []byte) error {
if _, err := io.WriteString(context.OutputBuffer, "stream\n"); err != nil { if _, err := io.WriteString(buffer, "stream\n"); err != nil {
return err return err
} }
if _, err := context.OutputBuffer.Write(streamBytes); err != nil { if _, err := buffer.Write(streamBytes); err != nil {
return err return err
} }
if _, err := io.WriteString(context.OutputBuffer, "\nendstream\n"); err != nil { if _, err := io.WriteString(buffer, "\nendstream\n"); err != nil {
return err return err
} }
@@ -267,16 +287,16 @@ func writeXrefStreamContent(context *SignContext, streamBytes []byte) error {
// writeXrefStreamLine writes a single line in the xref stream. // writeXrefStreamLine writes a single line in the xref stream.
func writeXrefStreamLine(b *bytes.Buffer, xreftype byte, offset int, gen byte) { func writeXrefStreamLine(b *bytes.Buffer, xreftype byte, offset int, gen byte) {
// Write type (1 byte)
b.WriteByte(xreftype) b.WriteByte(xreftype)
b.Write(encodeInt(offset))
b.WriteByte(gen)
}
// encodeInt encodes an integer to a 3-byte slice. // Write offset (4 bytes)
func encodeInt(i int) []byte { offsetBytes := make([]byte, 4)
result := make([]byte, 4) binary.BigEndian.PutUint32(offsetBytes, uint32(offset))
binary.BigEndian.PutUint32(result, uint32(i)) b.Write(offsetBytes)
return result[1:4]
// Write generation (1 byte)
b.WriteByte(gen)
} }
// EncodePNGSUBBytes encodes data using PNG SUB filter. // EncodePNGSUBBytes encodes data using PNG SUB filter.

59
sign/pdfxref_test.go Normal file
View File

@@ -0,0 +1,59 @@
package sign
import (
"os"
"testing"
"github.com/digitorus/pdf"
)
func TestGetLastObjectIDFromXref(t *testing.T) {
testCases := []struct {
fileName string
expected uint32
}{
{"minimal.pdf", 5},
{"testfile12.pdf", 16},
{"testfile14.pdf", 15},
{"testfile16.pdf", 567},
{"testfile17.pdf", 20},
{"testfile20.pdf", 10},
{"testfile21.pdf", 16},
{"small.pdf", 7},
}
for _, tc := range testCases {
t.Run(tc.fileName, func(st *testing.T) {
//st.Parallel()
input_file, err := os.Open("../testfiles/" + tc.fileName)
if err != nil {
st.Fatalf("%s: %s", tc.fileName, err.Error())
}
defer input_file.Close()
finfo, err := input_file.Stat()
if err != nil {
st.Fatalf("%s: %s", tc.fileName, err.Error())
}
size := finfo.Size()
r, err := pdf.NewReader(input_file, size)
if err != nil {
st.Fatalf("%s: %s", tc.fileName, err.Error())
}
sc := &SignContext{
InputFile: input_file,
PDFReader: r,
}
obj, err := sc.getLastObjectIDFromXref()
if err != nil {
st.Fatalf("%s: %s", tc.fileName, err.Error())
}
if obj != tc.expected {
st.Fatalf("%s: expected object id %d, got %d", tc.fileName, tc.expected, obj)
}
})
}
}

BIN
testfiles/testfile17.pdf Normal file

Binary file not shown.