diff --git a/samples/bugs/Issue585.pdf b/samples/bugs/Issue585.pdf
new file mode 100644
index 00000000..b282c108
Binary files /dev/null and b/samples/bugs/Issue585.pdf differ
diff --git a/samples/bugs/Issue608.pdf b/samples/bugs/Issue608.pdf
new file mode 100644
index 00000000..6e1e9f58
Binary files /dev/null and b/samples/bugs/Issue608.pdf differ
diff --git a/samples/Issue609.pdf b/samples/bugs/Issue609.pdf
similarity index 100%
rename from samples/Issue609.pdf
rename to samples/bugs/Issue609.pdf
diff --git a/samples/grouped-by-generator/Document_Generated_by_SmallPDF.pdf b/samples/grouped-by-generator/Document_Generated_by_SmallPDF.pdf
new file mode 100644
index 00000000..55a681d1
Binary files /dev/null and b/samples/grouped-by-generator/Document_Generated_by_SmallPDF.pdf differ
diff --git a/samples/grouped-by-generator/Mostly_German_Text_Generated_By_Microsoft_Word_2016.pdf b/samples/grouped-by-generator/Mostly_German_Text_Generated_By_Microsoft_Word_2016.pdf
new file mode 100644
index 00000000..ad2a0e2d
Binary files /dev/null and b/samples/grouped-by-generator/Mostly_German_Text_Generated_By_Microsoft_Word_2016.pdf differ
diff --git a/samples/grouped-by-generator/Mostly_German_Text_Generated_by_Power_PDF_Create.pdf b/samples/grouped-by-generator/Mostly_German_Text_Generated_by_Power_PDF_Create.pdf
new file mode 100644
index 00000000..8557fe5b
Binary files /dev/null and b/samples/grouped-by-generator/Mostly_German_Text_Generated_by_Power_PDF_Create.pdf differ
diff --git "a/samples/grouped-by-generator/Mostly_Turkish_Text_Generated_by_Word_i\303\247in_Acrobat_PDFMaker_17.pdf" "b/samples/grouped-by-generator/Mostly_Turkish_Text_Generated_by_Word_i\303\247in_Acrobat_PDFMaker_17.pdf"
new file mode 100644
index 00000000..d2c40e1d
Binary files /dev/null and "b/samples/grouped-by-generator/Mostly_Turkish_Text_Generated_by_Word_i\303\247in_Acrobat_PDFMaker_17.pdf" differ
diff --git a/samples/grouped-by-generator/R2RML-Spec_Generated_by_Chromium-SaveAs-PDF.pdf b/samples/grouped-by-generator/R2RML-Spec_Generated_by_Chromium-SaveAs-PDF.pdf
new file mode 100644
index 00000000..1220fe85
Binary files /dev/null and b/samples/grouped-by-generator/R2RML-Spec_Generated_by_Chromium-SaveAs-PDF.pdf differ
diff --git a/samples/grouped-by-generator/RichDocument_Generated_by_Libreoffice-6.4_PDF-v1.4.pdf b/samples/grouped-by-generator/RichDocument_Generated_by_Libreoffice-6.4_PDF-v1.4.pdf
new file mode 100644
index 00000000..1ac0eb3c
Binary files /dev/null and b/samples/grouped-by-generator/RichDocument_Generated_by_Libreoffice-6.4_PDF-v1.4.pdf differ
diff --git a/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.4.pdf b/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.4.pdf
new file mode 100644
index 00000000..f71ceeac
Binary files /dev/null and b/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.4.pdf differ
diff --git a/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.5.pdf b/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.5.pdf
new file mode 100644
index 00000000..287b9476
Binary files /dev/null and b/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.5.pdf differ
diff --git a/samples/grouped-by-generator/Wikipedia-PDF_Generated_by_Microsoft_Print-to-PDF.pdf b/samples/grouped-by-generator/Wikipedia-PDF_Generated_by_Microsoft_Print-to-PDF.pdf
new file mode 100644
index 00000000..d0de53c4
Binary files /dev/null and b/samples/grouped-by-generator/Wikipedia-PDF_Generated_by_Microsoft_Print-to-PDF.pdf differ
diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php
index 2e05cff5..378f8109 100644
--- a/src/Smalot/PdfParser/Font.php
+++ b/src/Smalot/PdfParser/Font.php
@@ -386,7 +386,7 @@ public static function decodeEntities(string $text): string
      */
     public static function decodeUnicode(string $text): string
     {
-        if (preg_match('/^\xFE\xFF/i', $text)) {
+        if ("\xFE\xFF" === substr($text, 0, 2)) {
             // Strip U+FEFF byte order marker.
             $decode = substr($text, 2);
             $text = '';
@@ -411,16 +411,17 @@ protected function getFontSpaceLimit(): int
     /**
      * Decode text by commands array.
      */
-    public function decodeText(array $commands): string
+    public function decodeText(array $commands, float $fontFactor = 4): string
     {
         $word_position = 0;
         $words = [];
-        $font_space = $this->getFontSpaceLimit();
+        $font_space = $this->getFontSpaceLimit() * abs($fontFactor) / 4;
 
         foreach ($commands as $command) {
             switch ($command[PDFObject::TYPE]) {
                 case 'n':
-                    if ((float) trim($command[PDFObject::COMMAND]) < $font_space) {
+                    $offset = (float) trim($command[PDFObject::COMMAND]);
+                    if ($offset - (float) $font_space < 0) {
                         $word_position = \count($words);
                     }
                     continue 2;
@@ -451,9 +452,32 @@ public function decodeText(array $commands): string
 
         foreach ($words as &$word) {
             $word = $this->decodeContent($word);
+            $word = str_replace("\t", ' ', $word);
         }
 
-        return implode(' ', $words);
+        // Remove internal "words" that are just spaces, but leave them
+        // if they are at either end of the array of words. This fixes,
+        // for   example,   lines   that   are   justified   to   fill
+        // a whole row.
+        for ($x = \count($words) - 2; $x >= 1; --$x) {
+            if ('' === trim($words[$x], ' ')) {
+                unset($words[$x]);
+            }
+        }
+        $words = array_values($words);
+
+        // Cut down on the number of unnecessary internal spaces by
+        // imploding the string on the null byte, and checking if the
+        // text includes extra spaces on either side. If so, merge
+        // where appropriate.
+        $words = implode("\x00\x00", $words);
+        $words = str_replace(
+            [" \x00\x00 ", "\x00\x00 ", " \x00\x00", "\x00\x00"],
+            ['  ', ' ', ' ', ' '],
+            $words
+        );
+
+        return $words;
     }
 
     /**
@@ -463,6 +487,12 @@ public function decodeText(array $commands): string
      */
     public function decodeContent(string $text, bool &$unicode = null): string
     {
+        // If this string begins with a UTF-16BE BOM, then decode it
+        // directly as Unicode
+        if ("\xFE\xFF" === substr($text, 0, 2)) {
+            return $this->decodeUnicode($text);
+        }
+
         if ($this->has('ToUnicode')) {
             return $this->decodeContentByToUnicodeCMapOrDescendantFonts($text);
         }
diff --git a/src/Smalot/PdfParser/PDFObject.php b/src/Smalot/PdfParser/PDFObject.php
index 798b84d0..30901ecb 100644
--- a/src/Smalot/PdfParser/PDFObject.php
+++ b/src/Smalot/PdfParser/PDFObject.php
@@ -73,6 +73,11 @@ class PDFObject
      */
     protected $config;
 
+    /**
+     * @var bool
+     */
+    protected $addPositionWhitespace = false;
+
     public function __construct(
         Document $document,
         Header $header = null,
@@ -127,6 +132,16 @@ public function getContent(): ?string
         return $this->content;
     }
 
+    /**
+     * Creates a duplicate of the document stream with
+     * strings and other items replaced by $char. Formerly
+     * getSectionsText() used this output to more easily gather offset
+     * values to extract text from the *actual* document stream.
+     *
+     * @deprecated function is no longer used and will be removed in a future release
+     *
+     * @internal
+     */
     public function cleanContent(string $content, string $char = 'X')
     {
         $char = $char[0];
@@ -186,41 +201,204 @@ public function cleanContent(string $content, string $char = 'X')
         return $content;
     }
 
+    /**
+     * Takes a string of PDF document stream text and formats
+     * it into a multi-line string with one PDF command on each line,
+     * separated by \r\n. If the given string is null, or binary data
+     * is detected instead of a document stream then return an empty
+     * string.
+     */
+    private function formatContent(?string $content): string
+    {
+        if (null === $content) {
+            return '';
+        }
+
+        // Find all strings () and replace them so they aren't affected
+        // by the next steps
+        $pdfstrings = [];
+        $attempt = '(';
+        while (preg_match('/'.preg_quote($attempt, '/').'.*?(?<![^\\\\]\\\\)\)/s', $content, $text)) {
+            // PDF strings can contain unescaped parentheses as long as
+            // they're balanced, so check for balanced parentheses
+            $left = preg_match_all('/(?<![^\\\\]\\\\)\(/', $text[0]);
+            $right = preg_match_all('/(?<![^\\\\]\\\\)\)/', $text[0]);
+
+            if ($left == $right) {
+                // Replace the string with a unique placeholder
+                $id = uniqid('STRING_', true);
+                $pdfstrings[$id] = $text[0];
+                $content = preg_replace(
+                    '/'.preg_quote($text[0], '/').'/',
+                    '@@@'.$id.'@@@',
+                    $content,
+                    1
+                );
+
+                // Reset to search for the next string
+                $attempt = '(';
+            } else {
+                // We had unbalanced parentheses, so use the current
+                // match as a base to find a longer string
+                $attempt = $text[0];
+            }
+        }
+
+        // Remove all carriage returns and line-feeds from the document stream
+        $content = str_replace(["\r", "\n"], ' ', trim($content));
+
+        // Find all dictionary << >> commands and replace them so they
+        // aren't affected by the next steps
+        $dictstore = [];
+        while (preg_match('/(<<.*?>> *)(BDC|BMC|DP|MP)/', $content, $dicttext)) {
+            $dictid = uniqid('DICT_', true);
+            $dictstore[$dictid] = $dicttext[1];
+            $content = preg_replace(
+                '/'.preg_quote($dicttext[0], '/').'/',
+                ' ###'.$dictid.'###'.$dicttext[2],
+                $content,
+                1
+            );
+        }
+
+        // Now that all strings and dictionaries are hidden, the only
+        // PDF commands left should all be plain text.
+        // Detect text encoding of the current string to prevent reading
+        // content streams that are images, etc. This prevents PHP
+        // error messages when JPEG content is sent to this function
+        // by the sample file '12249.pdf' from:
+        // https://github.com/smalot/pdfparser/issues/458
+        if (false === mb_detect_encoding($content, null, true)) {
+            return '';
+        }
+
+        // Normalize white-space in the document stream
+        $content = preg_replace('/\s{2,}/', ' ', $content);
+
+        // Find all valid PDF operators and add \r\n after each; this
+        // ensures there is just one command on every line
+        // Source: https://ia801001.us.archive.org/1/items/pdf1.7/pdf_reference_1-7.pdf - Appendix A
+        // Source: https://archive.org/download/pdf320002008/PDF32000_2008.pdf - Annex A
+        // Note: PDF Reference 1.7 lists 'I' and 'rI' as valid commands, while
+        //       PDF 32000:2008 lists them as 'i' and 'ri' respectively. Both versions
+        //       appear here in the list for completeness.
+        $operators = [
+          'b*', 'b', 'BDC', 'BMC', 'B*', 'BI', 'BT', 'BX', 'B', 'cm', 'cs', 'c', 'CS',
+          'd0', 'd1', 'd', 'Do', 'DP', 'EMC', 'EI', 'ET', 'EX', 'f*', 'f', 'F', 'gs',
+          'g', 'G',  'h', 'i', 'ID', 'I', 'j', 'J', 'k', 'K', 'l', 'm', 'MP', 'M', 'n',
+          'q', 'Q', 're', 'rg', 'ri', 'rI', 'RG', 'scn', 'sc', 'sh', 's', 'SCN', 'SC',
+          'S', 'T*', 'Tc', 'Td', 'TD', 'Tf', 'TJ', 'Tj', 'TL', 'Tm', 'Tr', 'Ts', 'Tw',
+          'Tz', 'v', 'w', 'W*', 'W', 'y', '\'', '"',
+        ];
+        foreach ($operators as $operator) {
+            $content = preg_replace(
+                '/(?<!\w|\/)'.preg_quote($operator, '/').'(?![\w10\*])/',
+                $operator."\r\n",
+                $content
+            );
+        }
+
+        // Restore the original content of the dictionary << >> commands
+        $dictstore = array_reverse($dictstore, true);
+        foreach ($dictstore as $id => $dict) {
+            $content = str_replace('###'.$id.'###', $dict, $content);
+        }
+
+        // Restore the original string content
+        $pdfstrings = array_reverse($pdfstrings, true);
+        foreach ($pdfstrings as $id => $text) {
+            // Strings may contain escaped newlines, or literal newlines
+            // and we should clean these up before replacing the string
+            // back into the content stream; this ensures no strings are
+            // split between two lines (every command must be on one line)
+            $text = str_replace(
+                ["\\\r\n", "\\\r", "\\\n", "\r", "\n"],
+                ['', '', '', '\r', '\n'],
+                $text
+            );
+
+            $content = str_replace('@@@'.$id.'@@@', $text, $content);
+        }
+
+        $content = trim(preg_replace(['/(\r\n){2,}/', '/\r\n +/'], "\r\n", $content));
+
+        return $content;
+    }
+
+    /**
+     * getSectionsText() now takes an entire, unformatted
+     * document stream as a string, cleans it, then filters out
+     * commands that aren't needed for text positioning/extraction. It
+     * returns an array of unprocessed PDF commands, one command per
+     * element.
+     *
+     * @internal
+     */
     public function getSectionsText(?string $content): array
     {
         $sections = [];
-        $content = ' '.$content.' ';
-        $textCleaned = $this->cleanContent($content, '_');
-
-        // Extract text blocks.
-        if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
-            foreach ($matches[2] as $pos => $part) {
-                $text = $part[0];
-                if ('' === $text) {
-                    continue;
-                }
-                $offset = $part[1];
-                $section = substr($content, $offset, \strlen($text));
 
-                // Removes BDC and EMC markup.
-                $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
+        // A cleaned stream has one command on every line, so split the
+        // cleaned stream content on \r\n into an array
+        $textCleaned = preg_split(
+            '/(\r\n|\n|\r)/',
+            $this->formatContent($content),
+            -1,
+            \PREG_SPLIT_NO_EMPTY
+        );
 
-                // Add Q and q flags if detected around BT/ET.
-                // @see: https://github.com/smalot/pdfparser/issues/387
-                $section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
+        $inTextBlock = false;
+        foreach ($textCleaned as $line) {
+            $line = trim($line);
 
-                $sections[] = $section;
+            // Skip empty lines
+            if ('' === $line) {
+                continue;
             }
-        }
-
-        // Extract 'do' commands.
-        if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
-            foreach ($matches[1] as $part) {
-                $text = $part[0];
-                $offset = $part[1];
-                $section = substr($content, $offset, \strlen($text));
 
-                $sections[] = $section;
+            // If a 'BT' is encountered, set the $inTextBlock flag
+            if (preg_match('/BT$/', $line)) {
+                $inTextBlock = true;
+                $sections[] = $line;
+
+                // If an 'ET' is encountered, unset the $inTextBlock flag
+            } elseif ('ET' == $line) {
+                $inTextBlock = false;
+                $sections[] = $line;
+            } elseif ($inTextBlock) {
+                // If we are inside a BT ... ET text block, save all lines
+                $sections[] = trim($line);
+            } else {
+                // Otherwise, if we are outside of a text block, only
+                // save specific, necessary lines. Care should be taken
+                // to ensure a command being checked for *only* matches
+                // that command. For instance, a simple search for 'c'
+                // may also match the 'sc' command. See the command
+                // list in the formatContent() method above.
+                // Add more commands to save here as you find them in
+                // weird PDFs!
+                if ('q' == $line[-1] || 'Q' == $line[-1]) {
+                    // Save and restore graphics state commands
+                    $sections[] = $line;
+                } elseif (preg_match('/(?<!\w)B[DM]C$/', $line)) {
+                    // Begin marked content sequence
+                    $sections[] = $line;
+                } elseif (preg_match('/(?<!\w)[DM]P$/', $line)) {
+                    // Marked content point
+                    $sections[] = $line;
+                } elseif (preg_match('/(?<!\w)EMC$/', $line)) {
+                    // End marked content sequence
+                    $sections[] = $line;
+                } elseif (preg_match('/(?<!\w)cm$/', $line)) {
+                    // Graphics position change commands
+                    $sections[] = $line;
+                } elseif (preg_match('/(?<!\w)Tf$/', $line)) {
+                    // Font change commands
+                    $sections[] = $line;
+                } elseif (preg_match('/(?<!\w)Do$/', $line)) {
+                    // Invoke named XObject command
+                    $sections[] = $line;
+                }
             }
         }
 
@@ -247,11 +425,17 @@ private function getDefaultFont(Page $page = null): Font
     }
 
     /**
+     * Decode a '[]TJ' command and attempt to use alternate
+     * fonts if the current font results in output that contains
+     * Unicode control characters.
+     *
+     * @internal
+     *
      * @param array<int,array<string,string|bool>> $command
      */
-    private function getTJUsingFontFallback(Font $font, array $command, Page $page = null): string
+    private function getTJUsingFontFallback(Font $font, array $command, Page $page = null, float $fontFactor = 4): string
     {
-        $orig_text = $font->decodeText($command);
+        $orig_text = $font->decodeText($command, $fontFactor);
         $text = $orig_text;
 
         // If we make this a Config option, we can add a check if it's
@@ -262,8 +446,8 @@ private function getTJUsingFontFallback(Font $font, array $command, Page $page =
             // If the decoded text contains UTF-8 control characters
             // then the font page being used is probably the wrong one.
             // Loop through the rest of the fonts to see if we can get
-            // a good decode.
-            while (preg_match('/[\x00-\x1f\x7f]/u', $text) || false !== strpos(bin2hex($text), '00')) {
+            // a good decode. Allow x09 to x0d which are whitespace.
+            while (preg_match('/[\x00-\x08\x0e-\x1f\x7f]/u', $text) || false !== strpos(bin2hex($text), '00')) {
                 // If we're out of font IDs, then give up and use the
                 // original string
                 if (0 == \count($font_ids)) {
@@ -272,7 +456,7 @@ private function getTJUsingFontFallback(Font $font, array $command, Page $page =
 
                 // Try the next font ID
                 $font = $page->getFont(array_shift($font_ids));
-                $text = $font->decodeText($command);
+                $text = $font->decodeText($command, $fontFactor);
             }
         }
 
@@ -280,148 +464,223 @@ private function getTJUsingFontFallback(Font $font, array $command, Page $page =
     }
 
     /**
+     * Expects a string that is a full PDF dictionary object,
+     * including the outer enclosing << >> angle brackets
+     *
+     * @internal
+     *
      * @throws \Exception
      */
-    public function getText(Page $page = null): string
+    public function parseDictionary(string $dictionary): array
     {
-        $result = '';
-        $sections = $this->getSectionsText($this->content);
-        $current_font = $this->getDefaultFont($page);
-        $clipped_font = $current_font;
+        // Normalize whitespace
+        $dictionary = preg_replace(['/\r/', '/\n/', '/\s{2,}/'], ' ', trim($dictionary));
 
-        $current_position_td = ['x' => false, 'y' => false];
-        $current_position_tm = ['x' => false, 'y' => false];
+        if ('<<' != substr($dictionary, 0, 2)) {
+            throw new \Exception('Not a valid dictionary object.');
+        }
 
-        self::$recursionStack[] = $this->getUniqueId();
+        $parsed = [];
+        $stack = [];
+        $currentName = '';
+        $arrayTypeNumeric = false;
+
+        // Remove outer layer of dictionary, and split on tokens
+        $split = preg_split(
+            '/(<<|>>|\[|\]|\/[^\s\/\[\]\(\)<>]*)/',
+            trim(preg_replace('/^<<|>>$/', '', $dictionary)),
+            -1,
+            \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
+        );
 
-        foreach ($sections as $section) {
-            $commands = $this->getCommandsText($section);
-            $reverse_text = false;
-            $text = '';
+        foreach ($split as $token) {
+            $token = trim($token);
+            switch ($token) {
+                case '':
+                    break;
 
-            foreach ($commands as $command) {
-                switch ($command[self::OPERATOR]) {
-                    case 'BMC':
-                        if ('ReversedChars' == $command[self::COMMAND]) {
-                            $reverse_text = true;
-                        }
-                        break;
+                    // Open numeric array
+                case '[':
+                    $parsed[$currentName] = [];
+                    $arrayTypeNumeric = true;
 
-                        // set character spacing
-                    case 'Tc':
-                        break;
+                    // Move up one level in the stack
+                    $stack[\count($stack)] = &$parsed;
+                    $parsed = &$parsed[$currentName];
+                    $currentName = '';
+                    break;
 
-                        // move text current point
-                    case 'Td':
-                        $args = preg_split('/\s/s', $command[self::COMMAND]);
-                        $y = array_pop($args);
-                        $x = array_pop($args);
-                        if (((float) $x <= 0)
-                            || (false !== $current_position_td['y'] && (float) $y < (float) $current_position_td['y'])
-                        ) {
-                            // vertical offset
-                            $text .= "\n";
-                        } elseif (false !== $current_position_td['x'] && (float) $x > (float)
-                            $current_position_td['x']
-                        ) {
-                            $text .= $this->config->getHorizontalOffset();
-                        }
-                        $current_position_td = ['x' => $x, 'y' => $y];
-                        break;
+                    // Open hashed array
+                case '<<':
+                    $parsed[$currentName] = [];
+                    $arrayTypeNumeric = false;
 
-                        // move text current point and set leading
-                    case 'TD':
-                        $args = preg_split('/\s/s', $command[self::COMMAND]);
-                        $y = array_pop($args);
-                        $x = array_pop($args);
-                        if ((float) $y < 0) {
-                            $text .= "\n";
-                        } elseif ((float) $x <= 0) {
-                            $text .= ' ';
-                        }
-                        break;
+                    // Move up one level in the stack
+                    $stack[\count($stack)] = &$parsed;
+                    $parsed = &$parsed[$currentName];
+                    $currentName = '';
+                    break;
 
-                    case 'Tf':
-                        list($id) = preg_split('/\s/s', $command[self::COMMAND]);
-                        $id = trim($id, '/');
-                        if (null !== $page) {
-                            $new_font = $page->getFont($id);
-                            // If an invalid font ID is given, do not update the font.
-                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
-                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
-                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
-                            // But we want to make sure that malformed PDFs do not simply crash.
-                            if (null !== $new_font) {
-                                $current_font = $new_font;
-                            }
+                    // Close numeric array
+                case ']':
+                    // Revert string type arrays back to a single element
+                    if (\is_array($parsed) && 1 == \count($parsed)
+                        && isset($parsed[0]) && \is_string($parsed[0])
+                        && '' !== $parsed[0] && '/' != $parsed[0][0]) {
+                        $parsed = '['.$parsed[0].']';
+                    }
+                    // Close hashed array
+                    // no break
+                case '>>':
+                    $arrayTypeNumeric = false;
+
+                    // Move down one level in the stack
+                    $parsed = &$stack[\count($stack) - 1];
+                    unset($stack[\count($stack) - 1]);
+                    break;
+
+                default:
+                    // If value begins with a slash, then this is a name
+                    // Add it to the appropriate array
+                    if ('/' == substr($token, 0, 1)) {
+                        $currentName = substr($token, 1);
+                        if (true == $arrayTypeNumeric) {
+                            $parsed[] = $currentName;
+                            $currentName = '';
                         }
-                        break;
+                    } elseif ('' != $currentName) {
+                        if (false == $arrayTypeNumeric) {
+                            $parsed[$currentName] = $token;
+                        }
+                        $currentName = '';
+                    } elseif ('' == $currentName) {
+                        $parsed[] = $token;
+                    }
+            }
+        }
 
-                    case 'Q':
-                        // Use clip: restore font.
-                        $current_font = $clipped_font;
-                        break;
+        return $parsed;
+    }
 
-                    case 'q':
-                        // Use clip: save font.
-                        $clipped_font = $current_font;
-                        break;
+    /**
+     * Returns the text content of a PDF as a string. Attempts to add
+     * whitespace for spacing and line-breaks where appropriate.
+     *
+     * getText() leverages getTextArray() to get the content
+     * of the document, setting the addPositionWhitespace flag to true
+     * so whitespace is inserted in a logical way for reading by
+     * humans.
+     */
+    public function getText(Page $page = null): string
+    {
+        $this->addPositionWhitespace = true;
+        $result = $this->getTextArray($page);
+        $this->addPositionWhitespace = false;
 
-                    case "'":
-                    case 'Tj':
-                        $command[self::COMMAND] = [$command];
-                        // no break
-                    case 'TJ':
-                        $text .= $this->getTJUsingFontFallback(
-                            $current_font,
-                            $command[self::COMMAND],
-                            $page
-                        );
-                        break;
+        return implode('', $result).' ';
+    }
 
-                        // set leading
-                    case 'TL':
-                        $text .= ' ';
+    /**
+     * Returns the text content of a PDF as an array of strings. No
+     * extra whitespace is inserted besides what is actually encoded in
+     * the PDF text.
+     *
+     * @throws \Exception
+     */
+    public function getTextArray(Page $page = null): array
+    {
+        $result = [];
+        $text = [];
+
+        $marked_stack = [];
+        $last_written_position = false;
+
+        $sections = $this->getSectionsText($this->content);
+        $current_font = $this->getDefaultFont($page);
+        $current_font_size = 1;
+        $current_text_leading = 0;
+
+        $current_position = ['x' => false, 'y' => false];
+        $current_position_tm = [
+            'a' => 1, 'b' => 0, 'c' => 0,
+            'i' => 0, 'j' => 1, 'k' => 0,
+            'x' => 0, 'y' => 0, 'z' => 1,
+        ];
+        $current_position_td = ['x' => 0, 'y' => 0];
+        $current_position_cm = [
+            'a' => 1, 'b' => 0, 'c' => 0,
+            'i' => 0, 'j' => 1, 'k' => 0,
+            'x' => 0, 'y' => 0, 'z' => 1,
+        ];
+
+        $clipped_font = [];
+        $clipped_position_cm = [];
+
+        self::$recursionStack[] = $this->getUniqueId();
+
+        foreach ($sections as $section) {
+            $commands = $this->getCommandsText($section);
+            foreach ($commands as $command) {
+                switch ($command[self::OPERATOR]) {
+                    // Begin text object
+                    case 'BT':
+                        // Reset text positioning matrices
+                        $current_position_tm = [
+                            'a' => 1, 'b' => 0, 'c' => 0,
+                            'i' => 0, 'j' => 1, 'k' => 0,
+                            'x' => 0, 'y' => 0, 'z' => 1,
+                        ];
+                        $current_position_td = ['x' => 0, 'y' => 0];
+                        $current_text_leading = 0;
                         break;
 
-                    case 'Tm':
-                        $args = preg_split('/\s/s', $command[self::COMMAND]);
-                        $y = array_pop($args);
-                        $x = array_pop($args);
-                        if (false !== $current_position_tm['x']) {
-                            $delta = abs((float) $x - (float) $current_position_tm['x']);
-                            if ($delta > 10) {
-                                $text .= "\t";
+                        // Begin marked content sequence with property list
+                    case 'BDC':
+                        if (preg_match('/(<<.*>>)$/', $command[self::COMMAND], $match)) {
+                            $dict = $this->parseDictionary($match[1]);
+
+                            // Check for ActualText block
+                            if (isset($dict['ActualText']) && \is_string($dict['ActualText']) && '' !== $dict['ActualText']) {
+                                if ('[' == $dict['ActualText'][0]) {
+                                    // Simulate a 'TJ' command on the stack
+                                    $marked_stack[] = [
+                                        'ActualText' => $this->getCommandsText($dict['ActualText'].'TJ')[0],
+                                    ];
+                                } elseif ('<' == $dict['ActualText'][0] || '(' == $dict['ActualText'][0]) {
+                                    // Simulate a 'Tj' command on the stack
+                                    $marked_stack[] = [
+                                        'ActualText' => $this->getCommandsText($dict['ActualText'].'Tj')[0],
+                                    ];
+                                }
                             }
                         }
-                        if (false !== $current_position_tm['y']) {
-                            $delta = abs((float) $y - (float) $current_position_tm['y']);
-                            if ($delta > 10) {
-                                $text .= "\n";
-                            }
-                        }
-                        $current_position_tm = ['x' => $x, 'y' => $y];
                         break;
 
-                        // set super/subscripting text rise
-                    case 'Ts':
-                        break;
+                        // Begin marked content sequence
+                    case 'BMC':
+                        if ('ReversedChars' == $command[self::COMMAND]) {
+                            // Upon encountering a ReversedChars command,
+                            // add the characters we've built up so far to
+                            // the result array
+                            $result = array_merge($result, $text);
 
-                        // set word spacing
-                    case 'Tw':
-                        break;
+                            // Start a fresh $text array that will contain
+                            // reversed characters
+                            $text = [];
 
-                        // set horizontal scaling
-                    case 'Tz':
-                        $text .= "\n";
-                        break;
-
-                        // move to start of next line
-                    case 'T*':
-                        $text .= "\n";
+                            // Add the reversed text flag to the stack
+                            $marked_stack[] = ['ReversedChars' => true];
+                        }
                         break;
 
-                    case 'Da':
+                        // set graphics position matrix
+                    case 'cm':
+                        $args = preg_split('/\s+/s', $command[self::COMMAND]);
+                        $current_position_cm = [
+                            'a' => (float) $args[0], 'b' => (float) $args[1], 'c' => 0,
+                            'i' => (float) $args[2], 'j' => (float) $args[3], 'k' => 0,
+                            'x' => (float) $args[4], 'y' => (float) $args[5], 'z' => 1,
+                        ];
                         break;
 
                     case 'Do':
@@ -433,178 +692,257 @@ public function getText(Page $page = null): string
                             // @todo $xobject could be a ElementXRef object, which would then throw an error
                             if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
                                 // Not a circular reference.
-                                $text .= $xobject->getText($page);
+                                $text[] = $xobject->getText($page);
                             }
                         }
                         break;
 
-                    case 'rg':
-                    case 'RG':
-                        break;
-
-                    case 're':
+                        // Marked content point with (DP) & without (MP) property list
+                    case 'DP':
+                    case 'MP':
                         break;
 
-                    case 'co':
+                        // End text object
+                    case 'ET':
                         break;
 
-                    case 'cs':
-                        break;
-
-                    case 'gs':
-                        break;
-
-                    case 'en':
-                        break;
-
-                    case 'sc':
-                    case 'SC':
-                        break;
-
-                    case 'g':
-                    case 'G':
-                        break;
-
-                    case 'V':
-                        break;
-
-                    case 'vo':
-                    case 'Vo':
+                        // Store current selected font and graphics matrix
+                    case 'q':
+                        $clipped_font[] = [$current_font, $current_font_size];
+                        $clipped_position_cm[] = $current_position_cm;
                         break;
 
-                    default:
-                }
-            }
-
-            // Fix Hebrew and other reverse text oriented languages.
-            // @see: https://github.com/smalot/pdfparser/issues/398
-            if ($reverse_text) {
-                $chars = mb_str_split($text, 1, mb_internal_encoding());
-                $text = implode('', array_reverse($chars));
-            }
-
-            $result .= $text;
-        }
-
-        return $result.' ';
-    }
-
-    /**
-     * @throws \Exception
-     */
-    public function getTextArray(Page $page = null): array
-    {
-        $text = [];
-        $sections = $this->getSectionsText($this->content);
-        $current_font = new Font($this->document, null, null, $this->config);
-
-        foreach ($sections as $section) {
-            $commands = $this->getCommandsText($section);
-
-            foreach ($commands as $command) {
-                switch ($command[self::OPERATOR]) {
-                    // set character spacing
-                    case 'Tc':
+                        // Restore previous selected font and graphics matrix
+                    case 'Q':
+                        list($current_font, $current_font_size) = array_pop($clipped_font);
+                        $current_position_cm = array_pop($clipped_position_cm);
                         break;
 
-                        // move text current point
-                    case 'Td':
-                        break;
+                        // End marked content sequence
+                    case 'EMC':
+                        $data = false;
+                        if (\count($marked_stack)) {
+                            $marked = array_pop($marked_stack);
+                            $action = key($marked);
+                            $data = $marked[$action];
+
+                            switch ($action) {
+                                // If we are in ReversedChars mode...
+                                case 'ReversedChars':
+                                    // Reverse the characters we've built up so far
+                                    foreach ($text as $key => $t) {
+                                        $text[$key] = implode('', array_reverse(
+                                            mb_str_split($t, 1, mb_internal_encoding())
+                                        ));
+                                    }
+
+                                    // Add these characters to the result array
+                                    $result = array_merge($result, $text);
+
+                                    // Start a fresh $text array that will contain
+                                    // non-reversed characters
+                                    $text = [];
+                                    break;
 
-                        // move text current point and set leading
-                    case 'TD':
-                        break;
+                                case 'ActualText':
+                                    // Use the content of the ActualText as a command
+                                    $command = $data;
+                                    break;
+                            }
+                        }
 
-                    case 'Tf':
-                        if (null !== $page) {
-                            list($id) = preg_split('/\s/s', $command[self::COMMAND]);
-                            $id = trim($id, '/');
-                            $current_font = $page->getFont($id);
+                        // If this EMC command has been transformed into a 'Tj'
+                        // or 'TJ' command because of being ActualText, then bypass
+                        // the break to proceed to the writing section below.
+                        if ('Tj' != $command[self::OPERATOR] && 'TJ' != $command[self::OPERATOR]) {
+                            break;
                         }
-                        break;
 
+                        // no break
                     case "'":
+                    case '"':
+                        if ("'" == $command[self::OPERATOR] || '"' == $command[self::OPERATOR]) {
+                            // Move to next line and write text
+                            $current_position['x'] = 0;
+                            $current_position_td['x'] = 0;
+                            $current_position_td['y'] += $current_text_leading;
+                        }
+                        // no break
                     case 'Tj':
                         $command[self::COMMAND] = [$command];
                         // no break
                     case 'TJ':
-                        $text[] = $this->getTJUsingFontFallback(
+                        // Check the marked content stack for flags
+                        $actual_text = false;
+                        $reverse_text = false;
+                        foreach ($marked_stack as $marked) {
+                            if (isset($marked['ActualText'])) {
+                                $actual_text = true;
+                            }
+                            if (isset($marked['ReversedChars'])) {
+                                $reverse_text = true;
+                            }
+                        }
+
+                        // Account for text position ONLY just before we write text
+                        if (false === $actual_text && \is_array($last_written_position)) {
+                            // If $last_written_position is an array, that
+                            // means we have stored text position coordinates
+                            // for placing an ActualText
+                            $currentX = $last_written_position[0];
+                            $currentY = $last_written_position[1];
+                            $last_written_position = false;
+                        } else {
+                            $currentX = $current_position_cm['x'] + $current_position_tm['x'] + $current_position_td['x'];
+                            $currentY = $current_position_cm['y'] + $current_position_tm['y'] + $current_position_td['y'];
+                        }
+                        $whiteSpace = '';
+
+                        $factorX = -$current_font_size * $current_position_tm['a'] - $current_font_size * $current_position_tm['i'];
+                        $factorY = $current_font_size * $current_position_tm['b'] + $current_font_size * $current_position_tm['j'];
+
+                        if (true === $this->addPositionWhitespace && false !== $current_position['x']) {
+                            $curY = $currentY - $current_position['y'];
+                            if (abs($curY) >= abs($factorY) / 4) {
+                                $whiteSpace = "\n";
+                            } else {
+                                if (true === $reverse_text) {
+                                    $curX = $current_position['x'] - $currentX;
+                                } else {
+                                    $curX = $currentX - $current_position['x'];
+                                }
+
+                                // In abs($factorX * 7) below, the 7 is chosen arbitrarily
+                                // as the number of apparent "spaces" in a document we
+                                // would need before considering them a "tab". In the
+                                // future, we might offer this value to users as a config
+                                // option.
+                                if ($curX >= abs($factorX * 7)) {
+                                    $whiteSpace = "\t";
+                                } elseif ($curX >= abs($factorX * 2)) {
+                                    $whiteSpace = ' ';
+                                }
+                            }
+                        }
+
+                        $newtext = $this->getTJUsingFontFallback(
                             $current_font,
                             $command[self::COMMAND],
-                            $page
+                            $page,
+                            $factorX
                         );
-                        break;
 
-                        // set leading
-                    case 'TL':
-                        break;
+                        // If there is no ActualText pending then write
+                        if (false === $actual_text) {
+                            $newtext = str_replace(["\r", "\n"], '', $newtext);
+                            if (false !== $reverse_text) {
+                                // If we are in ReversedChars mode, add the whitespace last
+                                $text[] = preg_replace('/  $/', ' ', $newtext.$whiteSpace);
+                            } else {
+                                // Otherwise add the whitespace first
+                                if (' ' === $whiteSpace && isset($text[\count($text) - 1])) {
+                                    $text[\count($text) - 1] = preg_replace('/ $/', '', $text[\count($text) - 1]);
+                                }
+                                $text[] = preg_replace('/^[ \t]{2}/', ' ', $whiteSpace.$newtext);
+                            }
 
-                    case 'Tm':
+                            // Record the position of this inserted text for comparison
+                            // with the next text block.
+                            // Provide a 'fudge' factor guess on how wide this text block
+                            // is based on the number of characters. This helps limit the
+                            // number of tabs inserted, but isn't perfect.
+                            $factor = $factorX / 2;
+                            $current_position = [
+                                'x' => $currentX - mb_strlen($newtext) * $factor,
+                                'y' => $currentY,
+                            ];
+                        } elseif (false === $last_written_position) {
+                            // If there is an ActualText in the pipeline
+                            // store the position this undisplayed text
+                            // *would* have been written to, so the
+                            // ActualText is displayed in the right spot
+                            $last_written_position = [$currentX, $currentY];
+                            $current_position['x'] = $currentX;
+                        }
                         break;
 
-                        // set super/subscripting text rise
-                    case 'Ts':
+                        // move to start of next line
+                    case 'T*':
+                        $current_position['x'] = 0;
+                        $current_position_td['x'] = 0;
+                        $current_position_td['y'] += $current_text_leading;
                         break;
 
-                        // set word spacing
-                    case 'Tw':
+                        // set character spacing
+                    case 'Tc':
                         break;
 
-                        // set horizontal scaling
-                    case 'Tz':
-                        // $text .= "\n";
-                        break;
+                        // move text current point and set leading
+                    case 'Td':
+                    case 'TD':
+                        // move text current point
+                        $args = preg_split('/\s+/s', $command[self::COMMAND]);
+                        $y = (float) array_pop($args);
+                        $x = (float) array_pop($args);
 
-                        // move to start of next line
-                    case 'T*':
-                        // $text .= "\n";
-                        break;
+                        if ('TD' == $command[self::OPERATOR]) {
+                            $current_text_leading = -$y * $current_position_tm['b'] - $y * $current_position_tm['j'];
+                        }
 
-                    case 'Da':
+                        $current_position_td = [
+                            'x' => $current_position_td['x'] + $x * $current_position_tm['a'] + $x * $current_position_tm['i'],
+                            'y' => $current_position_td['y'] + $y * $current_position_tm['b'] + $y * $current_position_tm['j'],
+                        ];
                         break;
 
-                    case 'Do':
+                    case 'Tf':
+                        $args = preg_split('/\s/s', $command[self::COMMAND]);
+                        $size = (float) array_pop($args);
+                        $id = trim(array_pop($args), '/');
                         if (null !== $page) {
-                            $args = preg_split('/\s/s', $command[self::COMMAND]);
-                            $id = trim(array_pop($args), '/ ');
-                            if ($xobject = $page->getXObject($id)) {
-                                $text[] = $xobject->getText($page);
+                            $new_font = $page->getFont($id);
+                            // If an invalid font ID is given, do not update the font.
+                            // This should theoretically never happen, as the PDF spec states for the Tf operator:
+                            // "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
+                            // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
+                            // But we want to make sure that malformed PDFs do not simply crash.
+                            if (null !== $new_font) {
+                                $current_font = $new_font;
+                                $current_font_size = $size;
                             }
                         }
                         break;
 
-                    case 'rg':
-                    case 'RG':
-                        break;
-
-                    case 're':
-                        break;
-
-                    case 'co':
-                        break;
-
-                    case 'cs':
-                        break;
-
-                    case 'gs':
+                        // set leading
+                    case 'TL':
+                        $y = (float) $command[self::COMMAND];
+                        $current_text_leading = -$y * $current_position_tm['b'] + -$y * $current_position_tm['j'];
                         break;
 
-                    case 'en':
+                        // set text position matrix
+                    case 'Tm':
+                        $args = preg_split('/\s+/s', $command[self::COMMAND]);
+                        $current_position_tm = [
+                            'a' => (float) $args[0], 'b' => (float) $args[1], 'c' => 0,
+                            'i' => (float) $args[2], 'j' => (float) $args[3], 'k' => 0,
+                            'x' => (float) $args[4], 'y' => (float) $args[5], 'z' => 1,
+                        ];
                         break;
 
-                    case 'sc':
-                    case 'SC':
+                        // set text rendering mode
+                    case 'Ts':
                         break;
 
-                    case 'g':
-                    case 'G':
+                        // set super/subscripting text rise
+                    case 'Ts':
                         break;
 
-                    case 'V':
+                        // set word spacing
+                    case 'Tw':
                         break;
 
-                    case 'vo':
-                    case 'Vo':
+                        // set horizontal scaling
+                    case 'Tz':
                         break;
 
                     default:
@@ -612,198 +950,103 @@ public function getTextArray(Page $page = null): array
             }
         }
 
-        return $text;
+        $result = array_merge($result, $text);
+
+        return $result;
     }
 
+    /**
+     * getCommandsText() expects the content of $text_part to be an
+     * already formatted, single-line command from a document stream.
+     * The companion function getSectionsText() returns a document
+     * stream as an array of single commands for just this purpose.
+     * Because of this, the argument $offset is no longer used, and
+     * may be removed in a future PdfParser release.
+     *
+     * A better name for this function would be getCommandText()
+     * since it now always works on just one command.
+     */
     public function getCommandsText(string $text_part, int &$offset = 0): array
     {
         $commands = $matches = [];
 
-        while ($offset < \strlen($text_part)) {
-            $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
-            $char = $text_part[$offset];
-
-            $operator = '';
-            $type = '';
-            $command = false;
-
-            switch ($char) {
-                case '/':
-                    $type = $char;
-                    if (preg_match(
-                        '/\G\/([A-Z0-9\._,\+-]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
-                        $text_part,
-                        $matches,
-                        0,
-                        $offset
-                    )
-                    ) {
-                        $operator = $matches[2];
-                        $command = $matches[1];
-                        $offset += \strlen($matches[0]);
-                    } elseif (preg_match(
-                        '/\G\/([A-Z0-9\._,\+-]+)\s+([A-Z]+)\s*/si',
-                        $text_part,
-                        $matches,
-                        0,
-                        $offset
-                    )
-                    ) {
-                        $operator = $matches[2];
-                        $command = $matches[1];
-                        $offset += \strlen($matches[0]);
-                    }
-                    break;
-
-                case '[':
-                case ']':
-                    // array object
-                    $type = $char;
-                    if ('[' == $char) {
-                        ++$offset;
-                        // get elements
-                        $command = $this->getCommandsText($text_part, $offset);
-
-                        if (preg_match(
-                            '/\G\s*[A-Z]{1,2}\s*/si',
-                            $text_part,
-                            $matches,
-                            0,
-                            $offset
-                        )
-                        ) {
-                            $operator = trim($matches[0]);
-                            $offset += \strlen($matches[0]);
-                        }
-                    } else {
-                        ++$offset;
-                        break;
-                    }
-                    break;
-
-                case '<':
-                case '>':
-                    // array object
-                    $type = $char;
-                    ++$offset;
-                    if ('<' == $char) {
-                        $strpos = strpos($text_part, '>', $offset);
-                        $command = substr($text_part, $offset, $strpos - $offset);
-                        $offset = $strpos + 1;
-                    }
+        preg_match('/^(([\/\[\(<])?.*)(?<!\w)([a-z01\'\"*]+)$/i', $text_part, $matches);
 
-                    if (preg_match(
-                        '/\G\s*[A-Z]{1,2}\s*/si',
-                        $text_part,
-                        $matches,
-                        0,
-                        $offset
-                    )
-                    ) {
-                        $operator = trim($matches[0]);
-                        $offset += \strlen($matches[0]);
-                    }
-                    break;
-
-                case '(':
-                case ')':
-                    ++$offset;
-                    $type = $char;
-                    $strpos = $offset;
-                    if ('(' == $char) {
-                        $open_bracket = 1;
-                        while ($open_bracket > 0) {
-                            if (!isset($text_part[$strpos])) {
-                                break;
-                            }
-                            $ch = $text_part[$strpos];
-                            switch ($ch) {
-                                case '\\':
-                                    // REVERSE SOLIDUS (5Ch) (Backslash)
-                                    // skip next character
-                                    ++$strpos;
-                                    break;
-
-                                case '(':
-                                    // LEFT PARENHESIS (28h)
-                                    ++$open_bracket;
-                                    break;
+        // If no valid command is detected, return an empty array
+        if (!isset($matches[1]) || !isset($matches[2]) || !isset($matches[3])) {
+            return [];
+        }
 
-                                case ')':
-                                    // RIGHT PARENTHESIS (29h)
-                                    --$open_bracket;
-                                    break;
-                            }
-                            ++$strpos;
-                        }
-                        $command = substr($text_part, $offset, $strpos - $offset - 1);
-                        $offset = $strpos;
-
-                        if (preg_match(
-                            '/\G\s*([A-Z\']{1,2})\s*/si',
-                            $text_part,
-                            $matches,
-                            0,
-                            $offset
-                        )
-                        ) {
-                            $operator = $matches[1];
-                            $offset += \strlen($matches[0]);
-                        }
+        $type = $matches[2];
+        $operator = $matches[3];
+        $command = trim($matches[1]);
+
+        if ('TJ' == $operator) {
+            $subcommand = [];
+            $command = trim($command, '[]');
+            do {
+                $oldCommand = $command;
+
+                // Search for parentheses string () format
+                if (preg_match('/^ *\((.*?)(?<![^\\\\]\\\\)\) *(-?[\d.]+)?/', $command, $tjmatch)) {
+                    $subcommand[] = [
+                        self::TYPE => '(',
+                        self::OPERATOR => 'TJ',
+                        self::COMMAND => $tjmatch[1],
+                    ];
+                    if (isset($tjmatch[2]) && trim($tjmatch[2])) {
+                        $subcommand[] = [
+                            self::TYPE => 'n',
+                            self::OPERATOR => '',
+                            self::COMMAND => $tjmatch[2],
+                        ];
                     }
-                    break;
+                    $command = substr($command, \strlen($tjmatch[0]));
+                }
 
-                default:
-                    if ('ET' == substr($text_part, $offset, 2)) {
-                        break;
-                    } elseif (preg_match(
-                        '/\G\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
-                        $text_part,
-                        $matches,
-                        0,
-                        $offset
-                    )
-                    ) {
-                        $operator = trim($matches['id']);
-                        $command = trim($matches['data']);
-                        $offset += \strlen($matches[0]);
-                    } elseif (preg_match(
-                        '/\G\s*([0-9\.\-]+\s*?)+\s*/si',
-                        $text_part,
-                        $matches,
-                        0,
-                        $offset
-                    )
-                    ) {
-                        $type = 'n';
-                        $command = trim($matches[0]);
-                        $offset += \strlen($matches[0]);
-                    } elseif (preg_match(
-                        '/\G\s*([A-Z\*]+)\s*/si',
-                        $text_part,
-                        $matches,
-                        0,
-                        $offset
-                    )
-                    ) {
-                        $type = '';
-                        $operator = $matches[1];
-                        $command = '';
-                        $offset += \strlen($matches[0]);
+                // Search for hexadecimal <> format
+                if (preg_match('/^ *<([0-9a-f\s]*)> *(-?[\d.]+)?/i', $command, $tjmatch)) {
+                    $tjmatch[1] = preg_replace('/\s/', '', $tjmatch[1]);
+                    $subcommand[] = [
+                        self::TYPE => '<',
+                        self::OPERATOR => 'TJ',
+                        self::COMMAND => $tjmatch[1],
+                    ];
+                    if (isset($tjmatch[2]) && trim($tjmatch[2])) {
+                        $subcommand[] = [
+                            self::TYPE => 'n',
+                            self::OPERATOR => '',
+                            self::COMMAND => $tjmatch[2],
+                        ];
                     }
+                    $command = substr($command, \strlen($tjmatch[0]));
+                }
+            } while ($command != $oldCommand);
+
+            $command = $subcommand;
+        } elseif ('Tj' == $operator || "'" == $operator || '"' == $operator) {
+            // Depending on the string type, trim the data of the
+            // appropriate delimiters
+            if ('(' == $type) {
+                // Don't use trim() here since a () string may end with
+                // a balanced or escaped right parentheses, and trim()
+                // will delete both. Both strings below are valid:
+                //   eg. (String())
+                //   eg. (String\))
+                $command = preg_replace('/^\(|\)$/', '', $command);
+            } elseif ('<' == $type) {
+                $command = trim($command, '<>');
             }
-
-            if (false !== $command) {
-                $commands[] = [
-                    self::TYPE => $type,
-                    self::OPERATOR => $operator,
-                    self::COMMAND => $command,
-                ];
-            } else {
-                break;
-            }
+        } elseif ('/' == $type) {
+            $command = substr($command, 1);
         }
 
+        $commands[] = [
+            self::TYPE => $type,
+            self::OPERATOR => $operator,
+            self::COMMAND => $command,
+        ];
+
         return $commands;
     }
 
diff --git a/src/Smalot/PdfParser/Page.php b/src/Smalot/PdfParser/Page.php
index 10afb523..b8002bd3 100644
--- a/src/Smalot/PdfParser/Page.php
+++ b/src/Smalot/PdfParser/Page.php
@@ -400,8 +400,6 @@ public function extractRawData(): array
             }
             $sectionsText = $content->getSectionsText($content->getContent());
             foreach ($sectionsText as $sectionText) {
-                $extractedData[] = ['t' => '', 'o' => 'BT', 'c' => ''];
-
                 $commandsText = $content->getCommandsText($sectionText);
                 foreach ($commandsText as $command) {
                     $extractedData[] = $command;
@@ -701,6 +699,12 @@ public function getDataTm(array $dataCommands = null): array
         $extractedTexts = $this->getTextArray();
         $extractedData = [];
         foreach ($dataCommands as $command) {
+            // If we've used up all the texts from getTextArray(), exit
+            // so we aren't accessing non-existent array indices
+            // Fixes 'undefined array key' errors in Issues #575, #576
+            if (\count($extractedTexts) <= \count($extractedData)) {
+                break;
+            }
             $currentText = $extractedTexts[\count($extractedData)];
             switch ($command['o']) {
                 /*
@@ -716,15 +720,9 @@ public function getDataTm(array $dataCommands = null): array
 
                     /*
                      * ET
-                     * End a text object, discarding the text matrix
+                     * End a text object
                      */
                 case 'ET':
-                    $Tm = $defaultTm;
-                    $Tl = $defaultTl;
-                    $Tx = 0;
-                    $Ty = 0;
-                    $fontId = $defaultFontId;
-                    $fontSize = $defaultFontSize;
                     break;
 
                     /*
@@ -739,7 +737,7 @@ public function getDataTm(array $dataCommands = null): array
 
                     /*
                      * tx ty Td
-                     * Move to the start of the next line, offset form the start of the
+                     * Move to the start of the next line, offset from the start of the
                      * current line by tx, ty.
                      */
                 case 'Td':
diff --git a/tests/PHPUnit/Integration/DocumentGeneratorFocusTest.php b/tests/PHPUnit/Integration/DocumentGeneratorFocusTest.php
new file mode 100644
index 00000000..f49f6f2d
--- /dev/null
+++ b/tests/PHPUnit/Integration/DocumentGeneratorFocusTest.php
@@ -0,0 +1,225 @@
+<?php
+
+/**
+ * @file This file is part of the PdfParser library.
+ *
+ * @author  Konrad Abicht <k.abicht@gmail.com>
+ *
+ * @date    2020-06-01
+ *
+ * @license LGPLv3
+ *
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ */
+
+namespace PHPUnitTests\Integration;
+
+use PHPUnitTests\TestCase;
+use Smalot\PdfParser\Parser;
+
+/**
+ * Document related tests which focus on certain PDF generators.
+ */
+class DocumentGeneratorFocusTest extends TestCase
+{
+    /**
+     * Test getText result.
+     *
+     * PDF generated with Chromium 116 via SaveAs-dialog.
+     */
+    public function testGetTextPull634Chromium(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/R2RML-Spec_Generated_by_Chromium-SaveAs-PDF.pdf');
+
+        self::assertStringContainsString('R2RML: RDB to RDF Mapping Language', $document->getText());
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF (v 1.4) generated with Inkscape 0.92.
+     */
+    public function testGetTextPull634InkscapePDF14(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.4.pdf');
+
+        self::assertEquals('TEST', $document->getText());
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF (v 1.5) generated with Inkscape 0.92.
+     */
+    public function testGetTextPull634InkscapePDF15(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/SimpleImage_Generated_by_Inkscape-0.92_PDF-v1.5.pdf');
+
+        self::assertEquals('TEST', $document->getText());
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF (1.4) generated with LibreOffice Writer (6.4).
+     *
+     * @see https://help.libreoffice.org/6.4/en-US/text/shared/01/ref_pdf_export.html
+     */
+    public function testGetTextPull634LibreOffice(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/RichDocument_Generated_by_Libreoffice-6.4_PDF-v1.4.pdf');
+
+        self::assertStringContainsString(
+            'Some currency symbols: £, €, ¥'."\n".'German characters: ÄÖÜß',
+            $document->getText()
+        );
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF (v 1.7) generated with Microsoft Print-to-PDF via Firefox.
+     */
+    public function testGetTextPull634MicrosoftPDF17(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/Wikipedia-PDF_Generated_by_Microsoft_Print-to-PDF.pdf');
+
+        $outputText = $document->getText();
+
+        self::assertStringContainsString(
+            'Adobe PDF icon'."\n".'Filename'."\n".'extension',
+            $outputText
+        );
+
+        self::assertStringContainsString(
+            'are necessary to make, use, sell, and distribute PDF-compliant',
+            $outputText
+        );
+    }
+
+    /**
+     * Test Document functions.
+     *
+     * PDF (v 1.5) generated by Microsoft Word 2016.
+     */
+    public function testGetTextPull634MicrosoftWord2016(): void
+    {
+        $path = $this->rootDir.'/samples/grouped-by-generator/Mostly_German_Text_Generated_By_Microsoft_Word_2016.pdf';
+        $document = (new Parser())->parseFile($path);
+
+        $outputText = $document->getText();
+
+        self::assertStringContainsString('(einschließlich Marktpflegequote) von 4 Mrd € angestrebt.', $outputText);
+
+        // check whitespaces and tab usage
+        self::assertStringContainsString(
+            //           ,--- here is a tab
+            'Fälligkeit: 	19. Oktober 2028 '."\n".
+            'Zinszahlung: 19. Oktober gzj., Zinslaufbeginn 15. Juni 2023',
+            $outputText
+        );
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF (v 1.5) generated with Power PDF Create.
+     */
+    public function testGetTextPull634PowerPDFCreate(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/Mostly_German_Text_Generated_by_Power_PDF_Create.pdf');
+
+        $outputText = $document->getText();
+
+        // located on page 1
+        self::assertStringContainsString(
+            'Index-Verhältniszahl: 1,17812 (am Valutierungstag 7. September 2023)',
+            $outputText
+        );
+
+        // located on page 2
+        self::assertStringContainsString(
+            'Einbeziehung in den '."\n".
+            'Börsenhandel: Dienstag, 5. September 2023 '."\n".
+            'Valutierungstag: Donnerstag, 7. September 2023',
+            $outputText
+        );
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF generated from .docx with SmallPDF (https://smallpdf.com)
+     */
+    public function testGetTextPull634SmallPDF(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/grouped-by-generator/Document_Generated_by_SmallPDF.pdf');
+
+        $outputText = $document->getText();
+
+        // Actual encoded spaces in the document are preserved
+        self::assertStringContainsString(
+            'SmallPDF                       SMALLPDF                             SmallPDF',
+            $outputText
+        );
+
+        // Hebrew text
+        self::assertStringContainsString(
+            'Hebrew Keyboard - תדלקמ תירבעב - Type Hebrew Online',
+            $outputText
+        );
+
+        // Russian text
+        self::assertStringContainsString(
+            'Russian Keyboard - русская клавиатура - Type Russian',
+            $outputText
+        );
+    }
+
+    /**
+     * Test getText result.
+     *
+     * PDF (1.6) generated by Word için Acrobat PDFMaker 17.
+     */
+    public function testGetTextPull634WordIcinAcrobatPDFMaker17(): void
+    {
+        $path = $this->rootDir.'/samples/grouped-by-generator/Mostly_Turkish_Text_Generated_by_Word_için_Acrobat_PDFMaker_17.pdf';
+        $document = (new Parser())->parseFile($path);
+
+        $outputText = $document->getText();
+
+        self::assertStringContainsString(
+            'İhracat ve döviz kazandırıcı hizmetler reeskont kredisi günlük',
+            $outputText
+        );
+
+        // Unnecessary tabs are not inserted due to font-size being 1,
+        // but the text-matrix scale is 9 or 10
+        self::assertStringContainsString(
+            'dikkate alınmasına devam edilecektir.',
+            $outputText
+        );
+
+        // This encoded segment contains an escaped backslash right before
+        // an octal code: \\\000. Account for this in Font::decodeOctal()
+        // See: https://github.com/smalot/pdfparser/pull/640
+        self::assertStringContainsString('Sayı: 2023-34', $outputText);
+    }
+}
diff --git a/tests/PHPUnit/Integration/DocumentIssueFocusTest.php b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php
new file mode 100644
index 00000000..7c7fe7e6
--- /dev/null
+++ b/tests/PHPUnit/Integration/DocumentIssueFocusTest.php
@@ -0,0 +1,114 @@
+<?php
+
+/**
+ * @file This file is part of the PdfParser library.
+ *
+ * @author  Konrad Abicht <k.abicht@gmail.com>
+ *
+ * @date    2020-06-01
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ *
+ * @date    2017-01-03
+ *
+ * @license LGPLv3
+ *
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ */
+
+namespace PHPUnitTests\Integration;
+
+use PHPUnitTests\TestCase;
+use Smalot\PdfParser\Document;
+use Smalot\PdfParser\Parser;
+
+/**
+ * Document related tests which are related to certain issues.
+ */
+class DocumentIssueFocusTest extends TestCase
+{
+    /**
+     * Tests getText method without a given page limit.
+     *
+     * @see https://github.com/smalot/pdfparser/pull/562
+     */
+    public function testGetTextNoPageLimit(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');
+
+        self::assertStringContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText());
+    }
+
+    /**
+     * Tests getText method with a given page limit.
+     *
+     * @see https://github.com/smalot/pdfparser/pull/562
+     */
+    public function testGetTextWithPageLimit(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');
+
+        // given text is on page 2, it has to be ignored because of that
+        self::assertStringNotContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText(1));
+    }
+
+    /**
+     * Tests extraction of XMP Metadata vs. getHeader() data.
+     *
+     * @see https://github.com/smalot/pdfparser/pull/606
+     */
+    public function testExtractXMPMetadata(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/XMP_Metadata.pdf');
+
+        $details = $document->getDetails();
+
+        // Test that the dc:title data was extracted from the XMP
+        // Metadata.
+        self::assertStringContainsString("Enhance PdfParser\u{2019}s Metadata Capabilities", $details['dc:title']);
+    }
+
+    /**
+     * Tests PDFDocEncoding decode of Document Properties
+     *
+     * @see https://github.com/smalot/pdfparser/issues/609
+     */
+    public function testPDFDocEncodingDecode(): void
+    {
+        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue609.pdf');
+
+        $details = $document->getDetails();
+
+        // These test that Adobe-inserted \r are removed from a UTF-8
+        // escaped metadata string, and the surrounding characters are
+        // repaired
+        $testKeywords = '˘ˇˆ˙˝˛˞˜•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ﬁﬂŁŒŠŸŽıłœšž€¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ';
+        self::assertStringContainsString($testKeywords, $details['Keywords']);
+
+        $testKeywords = 'added line-feeds often destroy multibyte characters';
+        self::assertStringContainsString($testKeywords, $details['Keywords']);
+
+        // This tests that the PDFDocEncoding characters that differ
+        // from CP-1252 are decoded to their correct UTF-8 code points
+        // as well as removing \r line-feeds
+        $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž';
+        self::assertStringContainsString($testSubject, $details['Subject']);
+    }
+}
diff --git a/tests/PHPUnit/Integration/DocumentTest.php b/tests/PHPUnit/Integration/DocumentTest.php
index 88137888..26553fca 100644
--- a/tests/PHPUnit/Integration/DocumentTest.php
+++ b/tests/PHPUnit/Integration/DocumentTest.php
@@ -40,9 +40,11 @@
 use Smalot\PdfParser\Header;
 use Smalot\PdfParser\Page;
 use Smalot\PdfParser\Pages;
-use Smalot\PdfParser\Parser;
 use Smalot\PdfParser\PDFObject;
 
+/**
+ * General Document related tests.
+ */
 class DocumentTest extends TestCase
 {
     protected function getDocumentInstance(): Document
@@ -230,72 +232,4 @@ public function testGetPagesMissingCatalog(): void
         $document = $this->getDocumentInstance();
         $document->getPages();
     }
-
-    /**
-     * Tests getText method without a given page limit.
-     *
-     * @see https://github.com/smalot/pdfparser/pull/562
-     */
-    public function testGetTextNoPageLimit(): void
-    {
-        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');
-
-        self::assertStringContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText());
-    }
-
-    /**
-     * Tests getText method with a given page limit.
-     *
-     * @see https://github.com/smalot/pdfparser/pull/562
-     */
-    public function testGetTextWithPageLimit(): void
-    {
-        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');
-
-        // given text is on page 2, it has to be ignored because of that
-        self::assertStringNotContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText(1));
-    }
-
-    /**
-     * Tests extraction of XMP Metadata vs. getHeader() data.
-     *
-     * @see https://github.com/smalot/pdfparser/pull/606
-     */
-    public function testExtractXMPMetadata(): void
-    {
-        $document = (new Parser())->parseFile($this->rootDir.'/samples/XMP_Metadata.pdf');
-
-        $details = $document->getDetails();
-
-        // Test that the dc:title data was extracted from the XMP
-        // Metadata.
-        self::assertStringContainsString("Enhance PdfParser\u{2019}s Metadata Capabilities", $details['dc:title']);
-    }
-
-    /**
-     * Tests PDFDocEncoding decode of Document Properties
-     *
-     * @see https://github.com/smalot/pdfparser/issues/609
-     */
-    public function testPDFDocEncodingDecode(): void
-    {
-        $document = (new Parser())->parseFile($this->rootDir.'/samples/Issue609.pdf');
-
-        $details = $document->getDetails();
-
-        // These test that Adobe-inserted \r are removed from a UTF-8
-        // escaped metadata string, and the surrounding characters are
-        // repaired
-        $testKeywords = '˘ˇˆ˙˝˛˞˜•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ﬁﬂŁŒŠŸŽıłœšž€¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ';
-        self::assertStringContainsString($testKeywords, $details['Keywords']);
-
-        $testKeywords = 'added line-feeds often destroy multibyte characters';
-        self::assertStringContainsString($testKeywords, $details['Keywords']);
-
-        // This tests that the PDFDocEncoding characters that differ
-        // from CP-1252 are decoded to their correct UTF-8 code points
-        // as well as removing \r line-feeds
-        $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž';
-        self::assertStringContainsString($testSubject, $details['Subject']);
-    }
 }
diff --git a/tests/PHPUnit/Integration/FontTest.php b/tests/PHPUnit/Integration/FontTest.php
index e76a051d..35d03284 100644
--- a/tests/PHPUnit/Integration/FontTest.php
+++ b/tests/PHPUnit/Integration/FontTest.php
@@ -367,8 +367,6 @@ public function testDecodeText(): void
      * which would be instance of PDFObject class (but not Encoding or ElementString).
      *
      * @see https://github.com/smalot/pdfparser/pull/500
-     *
-     * @group linux-only
      */
     public function testDecodeTextForFontWithIndirectEncodingWithoutTypeEncoding(): void
     {
@@ -378,14 +376,12 @@ public function testDecodeTextForFontWithIndirectEncodingWithoutTypeEncoding():
         $pages = $document->getPages();
         $page1 = reset($pages);
         $page1Text = $page1->getText();
-        $expectedText = <<<TEXT
-Export\u{a0}transakční\u{a0}historie
-Typ\u{a0}produktu:\u{a0}Podnikatelský\u{a0}účet\u{a0}Maxi
-Číslo\u{a0}účtu:\u{a0}0000000000/0000
-Počáteční\u{a0}zůstatek: 000\u{a0}000,00\u{a0}Kč
-Konečný\u{a0}zůstatek: 000\u{a0}000,00\u{a0}Kč
-Cena\u{a0}za\u{a0}služby
-TEXT;
+        $expectedText = "Export\u{a0}transakční\u{a0}historie\n";
+        $expectedText .= "Typ\u{a0}produktu:\u{a0}Podnikatelský\u{a0}účet\u{a0}Maxi\n";
+        $expectedText .= "Číslo\u{a0}účtu:\u{a0}0000000000/0000\n";
+        $expectedText .= "Počáteční\u{a0}zůstatek:\t000\u{a0}000,00\u{a0}Kč\n";
+        $expectedText .= "Konečný\u{a0}zůstatek:\t000\u{a0}000,00\u{a0}Kč\n";
+        $expectedText .= "Cena\u{a0}za\u{a0}služby";
 
         $this->assertEquals($expectedText, trim($page1Text));
     }
@@ -454,6 +450,21 @@ public function testCalculateTextWidth(): void
         $this->assertEquals([], $missing);
     }
 
+    public function testDecodeContent(): void
+    {
+        /*
+         * we do this to get into the branch with private method "decodeContentByEncodingElement" in Font.php
+         */
+        $encoding = $this->createMock(Element::class);
+        $encoding->method('getContent')->willReturn('WinAnsiEncoding');
+        $header = new Header(['Encoding' => $encoding]);
+
+        $font = new Font($this->createMock(Document::class), $header);
+
+        // Check that a string with UTF-16BE BOM is decoded directly
+        $this->assertEquals('ABC', $font->decodeContent("\xFE\xFF\x00\x41\x00\x42\x00\x43"));
+    }
+
     /**
      * Check behavior if getDetails() does return an array without a Widths-key.
      *
diff --git a/tests/PHPUnit/Integration/PDFObjectTest.php b/tests/PHPUnit/Integration/PDFObjectTest.php
index ab7b229b..9ec68043 100644
--- a/tests/PHPUnit/Integration/PDFObjectTest.php
+++ b/tests/PHPUnit/Integration/PDFObjectTest.php
@@ -52,12 +52,9 @@ protected function getPdfObjectInstance($document): PDFObject
         return new PDFObject($document);
     }
 
-    /**
-     * @group linux-only
-     */
     public function testGetCommandsText(): void
     {
-        $content = "/R14 30 Tf 0.999016 0 0 1 137.4
+        $content = "BT /R14 30 Tf 0.999016 0 0 1 137.4
 342.561 Tm
 [(A)-168.854( BC D)-220.905(\\(E\\))20.905<20>]
 TJ /R14 17.16 Tf <20> Tj
@@ -67,9 +64,20 @@ public function testGetCommandsText(): void
 q -124.774 124.127 5.64213 5.67154 930.307 4436.95 cm
 BI";
 
+        $sections = $this->getPdfObjectInstance(new Document())->getSectionsText($content);
+
         $offset = 0;
-        $parts = $this->getPdfObjectInstance(new Document())->getCommandsText($content, $offset);
+        $parts = [];
+        foreach ($sections as $section) {
+            $parts[] = $this->getPdfObjectInstance(new Document())->getCommandsText($section)[0];
+        }
+
         $reference = [
+            [
+                self::TYPE => '',
+                self::OPERATOR => 'BT',
+                self::COMMAND => '',
+            ],
             [
                 self::TYPE => '/',
                 self::OPERATOR => 'Tf',
@@ -78,7 +86,7 @@ public function testGetCommandsText(): void
             [
                 self::TYPE => '',
                 self::OPERATOR => 'Tm',
-                self::COMMAND => "0.999016 0 0 1 137.4\n342.561",
+                self::COMMAND => '0.999016 0 0 1 137.4 342.561',
             ],
             [
                 self::TYPE => '[',
@@ -86,7 +94,7 @@ public function testGetCommandsText(): void
                 self::COMMAND => [
                     [
                         self::TYPE => '(',
-                        self::OPERATOR => '',
+                        self::OPERATOR => 'TJ',
                         self::COMMAND => 'A',
                     ],
                     [
@@ -96,7 +104,7 @@ public function testGetCommandsText(): void
                     ],
                     [
                         self::TYPE => '(',
-                        self::OPERATOR => '',
+                        self::OPERATOR => 'TJ',
                         self::COMMAND => ' BC D',
                     ],
                     [
@@ -106,7 +114,7 @@ public function testGetCommandsText(): void
                     ],
                     [
                         self::TYPE => '(',
-                        self::OPERATOR => '',
+                        self::OPERATOR => 'TJ',
                         self::COMMAND => '\\(E\\)',
                     ],
                     [
@@ -116,7 +124,7 @@ public function testGetCommandsText(): void
                     ],
                     [
                         self::TYPE => '<',
-                        self::OPERATOR => '',
+                        self::OPERATOR => 'TJ',
                         self::COMMAND => '20',
                     ],
                 ],
@@ -151,10 +159,29 @@ public function testGetCommandsText(): void
                 self::OPERATOR => 'Tf',
                 self::COMMAND => 'R14 20.04',
             ],
+            [
+                self::TYPE => '',
+                self::OPERATOR => 'ET',
+                self::COMMAND => '',
+            ],
+            [
+                self::TYPE => '',
+                self::OPERATOR => 'Q',
+                self::COMMAND => '',
+            ],
+            [
+                self::TYPE => '',
+                self::OPERATOR => 'q',
+                self::COMMAND => '',
+            ],
+            [
+                self::TYPE => '',
+                self::OPERATOR => 'cm',
+                self::COMMAND => '-124.774 124.127 5.64213 5.67154 930.307 4436.95',
+            ],
         ];
 
         $this->assertEquals($parts, $reference);
-        $this->assertEquals(172, $offset);
     }
 
     public function testCleanContent(): void
@@ -202,10 +229,54 @@ public function testCleanContent(): void
         $this->assertEquals($cleaned, $expected);
     }
 
-    /**
-     * @group linux-only
-     */
-    public function testGetSectionText(): void
+    public function testFormatContent(): void
+    {
+        $content = '/Shape <</MCID << /Font<8>>> BT >>BDC Q /CS0 cs 1 1 0  scn 1 i
+/GS0 gs BT /TT0 1 Tf 0.0007 Tc 0.0018 Tw 0  Ts 100  Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
+(Modificatio[ns] au \\(14\\) septembre 2009 ET 2010)Tj EMC (ABC) Tj
+[ (a)-4.5(b)6(c)8.8 ( fsdfsdfsdf[]sd) ] TJ ET /Shape <</MCID 2 >>BDC q 0.03 841';
+
+        $expected = '/Shape <</MCID << /Font<8>>> BT >>BDC
+Q
+/CS0 cs
+1 1 0 scn
+1 i
+/GS0 gs
+BT
+/TT0 1 Tf
+0.0007 Tc
+0.0018 Tw
+0 Ts
+100 Tz
+0 Tr
+24 0 0 24 51.3 639.26025 Tm
+(Modificatio[ns] au \\(14\\) septembre 2009 ET 2010)Tj
+EMC
+(ABC) Tj
+[ (a)-4.5(b)6(c)8.8 ( fsdfsdfsdf[]sd) ] TJ
+ET
+/Shape <</MCID 2 >>BDC
+q
+0.03 841';
+
+        // Normalize line-endings
+        $expected = str_replace(["\r\n", "\n"], ["\n", "\r\n"], $expected);
+
+        $formatContent = new \ReflectionMethod('Smalot\PdfParser\PDFObject', 'formatContent');
+        $formatContent->setAccessible(true);
+        $cleaned = $formatContent->invoke($this->getPdfObjectInstance(new Document()), $content);
+
+        $this->assertEquals($expected, $cleaned);
+
+        // Check that binary data is rejected
+        $content = hex2bin('a670c89d4a324e47');
+
+        $cleaned = $formatContent->invoke($this->getPdfObjectInstance(new Document()), $content);
+
+        $this->assertEquals('', $cleaned);
+    }
+
+    public function testGetSectionsText(): void
     {
         $content = '/Shape <</MCID 1 >>BDC
 Q
@@ -229,16 +300,117 @@ public function testGetSectionText(): void
         $sections = $this->getPdfObjectInstance(new Document())->getSectionsText($content);
 
         $this->assertEquals(
-            ['/TT0 1 Tf
-0.0007 Tc 0.0018 Tw 0  Ts 100  Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
-(Mod BT atio[ns] au \(14\) septembre 2009 ET 2010)Tj
-EMC
-(ABC) Tj
-
-[ (a)-4.5(b) 6(c)8.8 ( fsdfsdfsdf[ sd) ] TD', '/TT1 1.5 Tf (BT )Tj
-q'],
+            [
+                '/Shape <</MCID 1 >>BDC',
+                'Q',
+                'BT',
+                '/TT0 1 Tf',
+                '0.0007 Tc',
+                '0.0018 Tw',
+                '0 Ts',
+                '100 Tz',
+                '0 Tr',
+                '24 0 0 24 51.3 639.26025 Tm',
+                '(Mod BT atio[ns] au \\(14\\) septembre 2009 ET 2010)Tj',
+                'EMC',
+                '(ABC) Tj',
+                '[ (a)-4.5(b) 6(c)8.8 ( fsdfsdfsdf[ sd) ] TD',
+                'ET',
+                '/Shape <</MCID [BT] >>BDC',
+                'BT',
+                '/TT1 1.5 Tf',
+                '(BT )Tj',
+                'ET',
+                'q',
+            ],
             $sections
         );
+
+        // Test that a Name containing 'ET' doesn't close a 'BT' block
+        // See: https://github.com/smalot/pdfparser/issues/474
+        $content = 'BT
+/FTxkPETkkj 8 Tf
+1 0 0 1 535.55 627.4 Tm
+(Hello World)TJ
+ET';
+
+        $sections = $this->getPdfObjectInstance(new Document())->getSectionsText($content);
+
+        $this->assertNotEquals('/FTxkP', $sections[0]);
+        $this->assertNotEquals('/FTxkP', $sections[1]);
+    }
+
+    public function testParseDictionary(): void
+    {
+        $data = '<</ActualText(text)/XObject<</F2 6 0 R /F3 [/Sub /Array]>> /Array[/Parsed /Data/Actual]/Silent<>>>';
+
+        $dictionary = $this->getPdfObjectInstance(new Document())->parseDictionary($data);
+
+        $this->assertArrayHasKey('ActualText', $dictionary);
+        $this->assertArrayHasKey('XObject', $dictionary);
+        $this->assertArrayHasKey('Array', $dictionary);
+        $this->assertArrayHasKey('Silent', $dictionary);
+
+        $this->assertCount(3, $dictionary['Array']);
+
+        $this->assertEquals('<>', $dictionary['Silent']);
+    }
+
+    /**
+     * Tests that graphics position (cm) is taken into account when
+     * positioning text
+     *
+     * @see: https://github.com/smalot/pdfparser/issues/608
+     */
+    public function testGraphicsPositioning(): void
+    {
+        $filename = $this->rootDir.'/samples/bugs/Issue608.pdf';
+
+        $parser = $this->getParserInstance();
+        $document = $parser->parseFile($filename);
+        $pages = $document->getPages();
+
+        // The \n is not added if 'cm' commands are ignored
+        $this->assertStringContainsString("Heading 1 \nLorem ipsum", $pages[0]->getText());
+    }
+
+    /**
+     * Tests that ActualText text is printed for a block instead of the
+     * contents of the Tj or TJ commands in the block.
+     *
+     * @see: https://github.com/smalot/pdfparser/issues/464
+     */
+    public function testActualText(): void
+    {
+        $filename = $this->rootDir.'/samples/bugs/Issue608.pdf';
+
+        $parser = $this->getParserInstance();
+        $document = $parser->parseFile($filename);
+        $pages = $document->getPages();
+
+        // An ActualText command subs in the three literal characters
+        // 'ffi' for the single character ligature here
+        // In addition, if $last_written_position isn't used to store
+        // the position to insert, \n's would be erroniously inserted
+        // on either side of the 'ffi'
+        $this->assertStringContainsString('efficitur', $pages[0]->getText());
+    }
+
+    /**
+     * Tests for the correct decoding of an Em-dash character in
+     * certain font contexts
+     *
+     * See: https://github.com/smalot/pdfparser/issues/585
+     */
+    public function testDecodeEmDash(): void
+    {
+        $filename = $this->rootDir.'/samples/bugs/Issue585.pdf';
+
+        $parser = $this->getParserInstance();
+        $document = $parser->parseFile($filename);
+        $pages = $document->getPages();
+
+        $this->assertStringContainsString('слева по ходу — веревка', $pages[0]->getText());
     }
 
     /**
@@ -254,7 +426,10 @@ public function testReversedChars(): void
         $document = $parser->parseFile($filename);
         $pages = $document->getPages();
 
-        $this->assertStringContainsString('שלומי טסט', $pages[0]->getText());
+        $pageText = $pages[0]->getText();
+
+        $this->assertStringContainsString('שלומי טסט', $pageText);
+        $this->assertStringContainsString('בנמל מספנות ישראל.', $pageText);
     }
 
     /**
@@ -290,4 +465,23 @@ public function testFontIDWithHyphen(): void
         $this->assertEquals('Tf', $fontCommandHyphen[0]['o']);
         $this->assertEquals('FID-01 15.00', $fontCommandHyphen[0]['c']);
     }
+
+    /**
+     * Tests that an invalid command does not cause an error, but just
+     * returns an empty array
+     */
+    public function testInvalidCommand(): void
+    {
+        $pdfObject = $this->getPdfObjectInstance(new Document());
+
+        $validCommand = $pdfObject->getCommandsText('75 rg');
+
+        $this->assertEquals('', $validCommand[0]['t']);
+        $this->assertEquals('rg', $validCommand[0]['o']);
+        $this->assertEquals('75', $validCommand[0]['c']);
+
+        $invalidCommand = $pdfObject->getCommandsText('75');
+
+        $this->assertEquals([], $invalidCommand);
+    }
 }
diff --git a/tests/PHPUnit/Integration/PageTest.php b/tests/PHPUnit/Integration/PageTest.php
index 7807101e..5e40ee90 100644
--- a/tests/PHPUnit/Integration/PageTest.php
+++ b/tests/PHPUnit/Integration/PageTest.php
@@ -166,7 +166,7 @@ public function testGetTextPullRequest457(): void
         $this->assertStringContainsString('KRANT', $text);
         $this->assertStringContainsString('DINSDAG', $text);
         $this->assertStringContainsString('Snelfilterkoffie', $text);
-        $this->assertStringContainsString('AardappelenZak', $text);
+        $this->assertStringContainsString('Aardappelen'."\n".'Zak', $text);
         $this->assertStringContainsString('ALL', $text);
     }
 
@@ -180,7 +180,7 @@ public function testExtractRawData(): void
         $page = $pages[0];
         $extractedRawData = $page->extractRawData();
 
-        $btItem = $extractedRawData[0];
+        $btItem = $extractedRawData[4];
         $this->assertCount(3, $btItem);
         $this->assertArrayHasKey('t', $btItem);
         $this->assertArrayHasKey('o', $btItem);
@@ -188,9 +188,9 @@ public function testExtractRawData(): void
 
         $this->assertEquals('BT', $btItem['o']);
 
-        $tmItem = $extractedRawData[2];
+        $tmItem = $extractedRawData[6];
 
-        $this->assertcount(174, $extractedRawData);
+        $this->assertcount(185, $extractedRawData);
         $this->assertCount(3, $tmItem);
 
         $this->assertArrayHasKey('t', $tmItem);
@@ -210,8 +210,8 @@ public function testExtractDecodedRawData(): void
         $pages = $document->getPages();
         $page = $pages[0];
         $extractedDecodedRawData = $page->extractDecodedRawData();
-        $tmItem = $extractedDecodedRawData[2];
-        $this->assertCount(174, $extractedDecodedRawData);
+        $tmItem = $extractedDecodedRawData[6];
+        $this->assertCount(185, $extractedDecodedRawData);
         $this->assertCount(3, $tmItem);
 
         $this->assertArrayHasKey('t', $tmItem);
@@ -226,7 +226,7 @@ public function testExtractDecodedRawData(): void
         $this->assertArrayHasKey('o', $tmItem);
         $this->assertArrayHasKey('c', $tmItem);
 
-        $tjItem = $extractedDecodedRawData[3];
+        $tjItem = $extractedDecodedRawData[7];
         $this->assertStringContainsString('TJ', $tjItem['o']);
         $this->assertStringContainsString('(', $tjItem['c'][0]['t']);
         $this->assertStringContainsString('D', $tjItem['c'][0]['c']);
@@ -256,7 +256,7 @@ public function testGetDataCommands(): void
         $pages = $document->getPages();
         $page = $pages[0];
         $dataCommands = $page->getDataCommands();
-        $this->assertCount(174, $dataCommands);
+        $this->assertCount(176, $dataCommands);
 
         $tmItem = $dataCommands[2];
         $this->assertCount(3, $tmItem);
diff --git a/tests/PHPUnit/Integration/ParserTest.php b/tests/PHPUnit/Integration/ParserTest.php
index 2531ba8e..29091914 100644
--- a/tests/PHPUnit/Integration/ParserTest.php
+++ b/tests/PHPUnit/Integration/ParserTest.php
@@ -321,7 +321,7 @@ public function testChangedFontSpaceLimit(): void
         $this->fixture = new Parser([], $config);
         $document = $this->fixture->parseFile($filename);
 
-        $this->assertStringContainsString('dni a  10  maj a  2018', $document->getText());
+        $this->assertStringContainsString('dni a 10 maj a 2018', $document->getText());
     }
 
     /**
@@ -376,7 +376,7 @@ public function testRetainImageContentImpact(): void
         }
 
         $usedMemory = memory_get_usage(true);
-        $this->assertTrue($usedMemory > ($baselineMemory * 1.5), 'Memory is only '.$usedMemory);
+        $this->assertGreaterThan($baselineMemory + 180000000, $usedMemory, 'Memory is only '.$usedMemory);
         $this->assertTrue(null != $document && '' !== $document->getText());
 
         // force garbage collection
@@ -400,7 +400,7 @@ public function testRetainImageContentImpact(): void
          * note: the following memory value is set manually and may differ from system to system.
          *       it must be high enough to not produce a false negative though.
          */
-        $this->assertTrue($usedMemory < ($baselineMemory * 1.05), 'Memory is '.$usedMemory);
+        $this->assertLessThan($baselineMemory * 1.05, $usedMemory, 'Memory is '.$usedMemory);
         $this->assertTrue('' !== $document->getText());
     }
 }