Add test_vision_model

HazyResearch · HiromuHota · Oct 9, 2020 · Oct 9, 2020 · Oct 9, 2020 · Oct 8, 2020
commit 7cc0f3c13df0e3d56074404a0478df0020f13dec
diff --git a/pdftotree/visual/visual_utils.py b/pdftotree/visual/visual_utils.py
@@ -1,14 +1,18 @@
 import os
+from typing import Tuple
 
 import keras.backend as K
 import numpy as np
 import selectivesearch
 from keras.preprocessing.image import img_to_array, load_img
+from numpy import ndarray
 from wand.color import Color
 from wand.image import Image
 
 
-def predict_heatmap(pdf_path, page_num, model, img_dim=448, img_dir="tmp/img"):
+def predict_heatmap(
+    pdf_path, page_num, model, img_dim=448, img_dir="tmp/img"
+) -> Tuple[ndarray, ndarray]:
     """
     Return an image corresponding to the page of the pdf
     documents saved at pdf_path. If the image is not found in img_dir this

diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -108,4 +108,5 @@ def test_vision_completion():
         model_type="vision",
         model_path="tests/input/paleo_visual_model.h5",
     )
-    assert output is not None
+    soup = BeautifulSoup(output, "lxml")
+    assert len(soup.find_all("table")) == 2
diff --git a/tests/test_table_detection.py b/tests/test_table_detection.py
@@ -0,0 +1,20 @@
+"""Test table area detection."""
+
+from pdftotree.core import load_model
+from pdftotree.visual.visual_utils import predict_heatmap
+
+
+def test_vision_model():
+    """Check if the vision model runs and returns results in expected format."""
+    pdf_file = "tests/input/paleo.pdf"
+    model_path = "tests/input/paleo_visual_model.h5"
+    model = load_model("vision", model_path)
+    page_num = 0
+    image, pred = predict_heatmap(
+        pdf_file, page_num, model
+    )  # index start at 0 with wand
+    assert image.shape == (448, 448, 3)
+    assert pred.shape == (448, 448)
+
+
+# TODO: add test_ml_model and test_heuristic_model