Merge pull request #249 from gunnsth/release/v3.4.0

Prepare unipdf release v3.4.0
unidoc · Feb 10, 2020 · 11e21cd · 11e21cd
2 parents be2b7f4 + 42c9cd2
commit 11e21cd
Show file tree

Hide file tree

Showing 25 changed files with 5,421 additions and 539 deletions.
diff --git a/annotator/field_appearance.go b/annotator/field_appearance.go
@@ -14,6 +14,7 @@ import (
 	"github.com/unidoc/unipdf/v3/common"
 	"github.com/unidoc/unipdf/v3/contentstream"
 	"github.com/unidoc/unipdf/v3/core"
+	"github.com/unidoc/unipdf/v3/internal/textencoding"
 	"github.com/unidoc/unipdf/v3/model"
 )
 
@@ -267,6 +268,11 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
 		resources.SetFontByName(*fontname, fontobj)
 	}
 	encoder := font.Encoder()
+	if encoder == nil {
+		common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.")
+		encoder = textencoding.NewIdentityTextEncoder("Identity-H")
+	}
+
 	fdescriptor, err := font.GetFontDescriptor()
 	if err != nil {
 		common.Log.Debug("Error: Unable to get font descriptor")

diff --git a/common/version.go b/common/version.go
@@ -11,12 +11,12 @@ import (
 )
 
 const releaseYear = 2020
-const releaseMonth = 1
-const releaseDay = 4
-const releaseHour = 19
-const releaseMin = 5
+const releaseMonth = 2
+const releaseDay = 10
+const releaseHour = 8
+const releaseMin = 50
 
 // Version holds version information, when bumping this make sure to bump the released at stamp also.
-const Version = "3.3.1"
+const Version = "3.4.0"
 
 var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)
diff --git a/contentstream/inline-image.go b/contentstream/inline-image.go
@@ -361,6 +361,8 @@ func (csp *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, e
 				im.Interpolate = valueObj
 			case "W", "Width":
 				im.Width = valueObj
+			case "Length", "Subtype", "Type":
+				common.Log.Debug("Ignoring inline parameter %s", *param)
 			default:
 				return nil, fmt.Errorf("unknown inline image parameter %s", *param)
 			}

diff --git a/contentstream/parser.go b/contentstream/parser.go
@@ -191,67 +191,7 @@ func (csp *ContentStreamParser) parseName() (core.PdfObjectName, error) {
 // we will support it in the reader (no confusion with other types, so
 // no compromise).
 func (csp *ContentStreamParser) parseNumber() (core.PdfObject, error) {
-	isFloat := false
-	allowSigns := true
-	numStr := ""
-	for {
-		common.Log.Trace("Parsing number \"%s\"", numStr)
-		bb, err := csp.reader.Peek(1)
-		if err == io.EOF {
-			// GH: EOF handling.  Handle EOF like end of line.  Can happen with
-			// encoded object streams that the object is at the end.
-			// In other cases, we will get the EOF error elsewhere at any rate.
-			break // Handle like EOF
-		}
-		if err != nil {
-			common.Log.Error("ERROR %s", err)
-			return nil, err
-		}
-		if allowSigns && (bb[0] == '-' || bb[0] == '+') {
-			// Only appear in the beginning, otherwise serves as a delimiter.
-			b, _ := csp.reader.ReadByte()
-			numStr += string(b)
-			allowSigns = false // Only allowed in beginning, and after e (exponential).
-		} else if core.IsDecimalDigit(bb[0]) {
-			b, _ := csp.reader.ReadByte()
-			numStr += string(b)
-		} else if bb[0] == '.' {
-			b, _ := csp.reader.ReadByte()
-			numStr += string(b)
-			isFloat = true
-		} else if bb[0] == 'e' {
-			// Exponential number format.
-			b, _ := csp.reader.ReadByte()
-			numStr += string(b)
-			isFloat = true
-			allowSigns = true
-		} else {
-			break
-		}
-	}
-
-	var o core.PdfObject
-	if isFloat {
-		fVal, err := strconv.ParseFloat(numStr, 64)
-		if err != nil {
-			common.Log.Debug("Error parsing number %q err=%v. Using 0.0. Output may be incorrect", numStr, err)
-			fVal = 0.0
-		}
-
-		objFloat := core.PdfObjectFloat(fVal)
-		o = &objFloat
-	} else {
-		intVal, err := strconv.ParseInt(numStr, 10, 64)
-		if err != nil {
-			common.Log.Debug("Error parsing integer %q err=%v. Using 0. Output may be incorrect", numStr, err)
-			intVal = 0
-		}
-
-		objInt := core.PdfObjectInteger(intVal)
-		o = &objInt
-	}
-
-	return o, nil
+	return core.ParseNumber(csp.reader)
 }
 
 // A string starts with '(' and ends with ')'.

diff --git a/core/crypt.go b/core/crypt.go
@@ -218,8 +218,8 @@ func decodeEncryptStd(d *security.StdEncryptDict, ed *PdfObjectDictionary) error
 func decodeCryptFilter(cf *crypto.FilterDict, d *PdfObjectDictionary) error {
 	// If Type present, should be CryptFilter.
 	if typename, ok := d.Get("Type").(*PdfObjectName); ok {
-		if string(*typename) != "CryptFilter" {
-			return fmt.Errorf("CF dict type != CryptFilter (%s)", typename)
+		if cfType := string(*typename); cfType != "CryptFilter" {
+			common.Log.Debug("Invalid CF dict type: %s (should be CryptFilter)", cfType)
 		}
 	}
 

diff --git a/core/parser.go b/core/parser.go
@@ -286,69 +286,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
 // we will support it in the reader (no confusion with other types, so
 // no compromise).
 func (parser *PdfParser) parseNumber() (PdfObject, error) {
-	isFloat := false
-	allowSigns := true
-	var r bytes.Buffer
-	for {
-		common.Log.Trace("Parsing number \"%s\"", r.String())
-		bb, err := parser.reader.Peek(1)
-		if err == io.EOF {
-			// GH: EOF handling.  Handle EOF like end of line.  Can happen with
-			// encoded object streams that the object is at the end.
-			// In other cases, we will get the EOF error elsewhere at any rate.
-			break // Handle like EOF
-		}
-		if err != nil {
-			common.Log.Debug("ERROR %s", err)
-			return nil, err
-		}
-		if allowSigns && (bb[0] == '-' || bb[0] == '+') {
-			// Only appear in the beginning, otherwise serves as a delimiter.
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-			allowSigns = false // Only allowed in beginning, and after e (exponential).
-		} else if IsDecimalDigit(bb[0]) {
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-		} else if bb[0] == '.' {
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-			isFloat = true
-		} else if bb[0] == 'e' || bb[0] == 'E' {
-			// Exponential number format.
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-			isFloat = true
-			allowSigns = true
-		} else {
-			break
-		}
-	}
-
-	var o PdfObject
-	if isFloat {
-		fVal, err := strconv.ParseFloat(r.String(), 64)
-		if err != nil {
-			common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
-			fVal = 0.0
-			err = nil
-		}
-
-		objFloat := PdfObjectFloat(fVal)
-		o = &objFloat
-	} else {
-		intVal, err := strconv.ParseInt(r.String(), 10, 64)
-		if err != nil {
-			common.Log.Debug("Error parsing number %v err=%v. Using 0. Output may be incorrect", r.String(), err)
-			intVal = 0
-			err = nil
-		}
-
-		objInt := PdfObjectInteger(intVal)
-		o = &objInt
-	}
-
-	return o, nil
+	return ParseNumber(parser.reader)
 }
 
 // A string starts with '(' and ends with ')'.
@@ -1197,7 +1135,7 @@ func (parser *PdfParser) seekToEOFMarker(fSize int64) error {
 	var offset int64
 
 	// Define an buffer length in terms of how many bytes to read from the end of the file.
-	var buflen int64 = 1000
+	var buflen int64 = 2048
 
 	for offset < fSize {
 		if fSize <= (buflen + offset) {
@@ -1224,7 +1162,7 @@ func (parser *PdfParser) seekToEOFMarker(fSize int64) error {
 		}
 
 		common.Log.Debug("Warning: EOF marker not found! - continue seeking")
-		offset += buflen
+		offset += buflen - 4
 	}
 
 	common.Log.Debug("Error: EOF marker was not found.")

diff --git a/core/utils.go b/core/utils.go
@@ -6,10 +6,14 @@
 package core
 
 import (
+	"bufio"
+	"bytes"
 	"errors"
 	"fmt"
+	"io"
 	"reflect"
 	"sort"
+	"strconv"
 
 	"github.com/unidoc/unipdf/v3/common"
 )
@@ -374,3 +378,91 @@ func flattenObject(obj PdfObject, depth int) PdfObject {
 	}
 	return obj
 }
+
+// ParseNumber parses a numeric objects from a buffered stream.
+// Section 7.3.3.
+// Integer or Float.
+//
+// An integer shall be written as one or more decimal digits optionally
+// preceded by a sign. The value shall be interpreted as a signed
+// decimal integer and shall be converted to an integer object.
+//
+// A real value shall be written as one or more decimal digits with an
+// optional sign and a leading, trailing, or embedded PERIOD (2Eh)
+// (decimal point). The value shall be interpreted as a real number
+// and shall be converted to a real object.
+//
+// Regarding exponential numbers: 7.3.3 Numeric Objects:
+// A conforming writer shall not use the PostScript syntax for numbers
+// with non-decimal radices (such as 16#FFFE) or in exponential format
+// (such as 6.02E23).
+// Nonetheless, we sometimes get numbers with exponential format, so
+// we will support it in the reader (no confusion with other types, so
+// no compromise).
+func ParseNumber(buf *bufio.Reader) (PdfObject, error) {
+	isFloat := false
+	allowSigns := true
+	var r bytes.Buffer
+	for {
+		if common.Log.IsLogLevel(common.LogLevelTrace) {
+			common.Log.Trace("Parsing number \"%s\"", r.String())
+		}
+		bb, err := buf.Peek(1)
+		if err == io.EOF {
+			// GH: EOF handling.  Handle EOF like end of line.  Can happen with
+			// encoded object streams that the object is at the end.
+			// In other cases, we will get the EOF error elsewhere at any rate.
+			break // Handle like EOF
+		}
+		if err != nil {
+			common.Log.Debug("ERROR %s", err)
+			return nil, err
+		}
+		if allowSigns && (bb[0] == '-' || bb[0] == '+') {
+			// Only appear in the beginning, otherwise serves as a delimiter.
+			b, _ := buf.ReadByte()
+			r.WriteByte(b)
+			allowSigns = false // Only allowed in beginning, and after e (exponential).
+		} else if IsDecimalDigit(bb[0]) {
+			b, _ := buf.ReadByte()
+			r.WriteByte(b)
+		} else if bb[0] == '.' {
+			b, _ := buf.ReadByte()
+			r.WriteByte(b)
+			isFloat = true
+		} else if bb[0] == 'e' || bb[0] == 'E' {
+			// Exponential number format.
+			b, _ := buf.ReadByte()
+			r.WriteByte(b)
+			isFloat = true
+			allowSigns = true
+		} else {
+			break
+		}
+	}
+
+	var o PdfObject
+	if isFloat {
+		fVal, err := strconv.ParseFloat(r.String(), 64)
+		if err != nil {
+			common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
+			fVal = 0.0
+			err = nil
+		}
+
+		objFloat := PdfObjectFloat(fVal)
+		o = &objFloat
+	} else {
+		intVal, err := strconv.ParseInt(r.String(), 10, 64)
+		if err != nil {
+			common.Log.Debug("Error parsing number %v err=%v. Using 0. Output may be incorrect", r.String(), err)
+			intVal = 0
+			err = nil
+		}
+
+		objInt := PdfObjectInteger(intVal)
+		o = &objInt
+	}
+
+	return o, nil
+}
diff --git a/creator/chapters.go b/creator/chapters.go
@@ -104,6 +104,7 @@ func (chap *Chapter) NewSubchapter(title string) *Chapter {
 
 // SetShowNumbering sets a flag to indicate whether or not to show chapter numbers as part of title.
 func (chap *Chapter) SetShowNumbering(show bool) {
+	chap.showNumbering = show
 	chap.heading.SetText(chap.headingText())
 }
 

diff --git a/extractor/text_test.go b/extractor/text_test.go
@@ -567,7 +567,7 @@ var textLocTests = []textLocTest{
 			},
 			2: pageContents{
 				termBBox: map[string]model.PdfRectangle{
-					"atrium": r(414.5, 113.5, 435.5, 121.0),
+					"atrium": r(452.78, 407.76, 503.78, 416.26),
 				},
 			},
 			3: pageContents{

diff --git a/fdf/parser.go b/fdf/parser.go
@@ -212,54 +212,7 @@ func (parser *fdfParser) parseName() (core.PdfObjectName, error) {
 // we will support it in the reader (no confusion with other types, so
 // no compromise).
 func (parser *fdfParser) parseNumber() (core.PdfObject, error) {
-	isFloat := false
-	allowSigns := true
-	var r bytes.Buffer
-	for {
-		common.Log.Trace("Parsing number \"%s\"", r.String())
-		bb, err := parser.reader.Peek(1)
-		if err == io.EOF {
-			// GH: EOF handling.  Handle EOF like end of line.  Can happen with
-			// encoded object streams that the object is at the end.
-			// In other cases, we will get the EOF error elsewhere at any rate.
-			break // Handle like EOF
-		}
-		if err != nil {
-			common.Log.Debug("ERROR %s", err)
-			return nil, err
-		}
-		if allowSigns && (bb[0] == '-' || bb[0] == '+') {
-			// Only appear in the beginning, otherwise serves as a delimiter.
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-			allowSigns = false // Only allowed in beginning, and after e (exponential).
-		} else if core.IsDecimalDigit(bb[0]) {
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-		} else if bb[0] == '.' {
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-			isFloat = true
-		} else if bb[0] == 'e' {
-			// Exponential number format.
-			b, _ := parser.reader.ReadByte()
-			r.WriteByte(b)
-			isFloat = true
-			allowSigns = true
-		} else {
-			break
-		}
-	}
-
-	if isFloat {
-		fVal, err := strconv.ParseFloat(r.String(), 64)
-		o := core.PdfObjectFloat(fVal)
-		return &o, err
-	} else {
-		intVal, err := strconv.ParseInt(r.String(), 10, 64)
-		o := core.PdfObjectInteger(intVal)
-		return &o, err
-	}
+	return core.ParseNumber(parser.reader)
 }
 
 // A string starts with '(' and ends with ')'.