Skip to content

Commit

Permalink
Filter equation overlaps
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Feb 12, 2025
1 parent 1054a4a commit ebe1024
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
7 changes: 7 additions & 0 deletions marker/builders/line.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ def get_all_lines(self, document: Document, provider: PdfProvider, do_inline_mat
image_size,
page_size
)
detection_result = self.filter_equation_overlaps(
document,
document_page,
detection_result,
image_size,
page_size
)

# Merge text and inline math detection results
merged_detection_boxes = self.determine_math_lines(text_result=detection_result, inline_result=inline_detection_result)
Expand Down
4 changes: 0 additions & 4 deletions marker/processors/llm/llm_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
from PIL import Image

from marker.processors.llm import BaseLLMSimpleBlockProcessor, PromptData, BlockData
from bs4 import BeautifulSoup

from marker.processors.util import add_math_spans_to_line
from marker.schema import BlockTypes
from marker.schema.blocks import Block
from marker.schema.document import Document
from marker.schema.registry import get_block_class
from marker.schema.text import Line


Expand Down Expand Up @@ -95,8 +93,6 @@ def inference_blocks(self, document: Document) -> List[List[BlockData]]:
out_blocks.append(batch)
return out_blocks



def get_block_lines(self, block: Block, document: Document) -> Tuple[list, list]:
text_lines = block.contained_blocks(document, (BlockTypes.Line,))
extracted_lines = [line.formatted_text(document) for line in text_lines]
Expand Down

0 comments on commit ebe1024

Please sign in to comment.