Lean-zh · RexWzh · Dec 19, 2024 · Dec 19, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -0,0 +1,60 @@
+name: Python Package
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+        os: [ubuntu-latest]
+
+    steps:
+    - name: Install elan
+      run: |
+        set -o pipefail
+        curl -sSfL https://github.com/leanprover/elan/releases/download/v3.1.1/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz
+        ./elan-init -y --default-toolchain none
+        echo "$HOME/.elan/bin" >> $GITHUB_PATH
+
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+    - name: Install Dependencies
+      run: |
+        pushd experiments/minif2f/MiniF2F/
+        lake exe cache get && lake build
+        popd
+
+    - name: Cache dependencies
+      uses: actions/cache@v3
+      with:
+        path: |
+          ~/.cache/pip
+          ~/.cache/pypoetry
+        key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+        restore-keys: |
+          ${{ runner.os }}-poetry-
+
+    - name: Install Poetry
+      run: |
+        curl -sSL https://install.python-poetry.org | python3 -
+        echo "export PATH=$HOME/.local/bin:$PATH" >> $GITHUB_ENV
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        poetry build
+        poetry install
+    - name: Test with pytest
+      run: |
+        poetry run pytest -s tests/
diff --git a/build.py → custom_build.py b/build.py → custom_build.py
diff --git a/experiments/minif2f/test.jsonl b/experiments/minif2f/test.jsonl
diff --git a/experiments/minif2f/valid.jsonl b/experiments/minif2f/valid.jsonl
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,7 +17,7 @@ python = "^3.10"
 
 [tool.poetry.build]
 generate-setup-file = false
-script = "build.py"
+script = "custom_build.py"
 
 [tool.poetry.group.dev.dependencies]
 # Experiment related dependencies here to not clutter the main project dependencies.
@@ -34,6 +34,8 @@ termcolor = "^2.4.0"
 matplotlib = "^3.9.2"
 seaborn = "^0.13.2"
 pandas = "^2.2.3"
+pytest = "^8.3.4"
+loguru = "^0.7.3"
 
 [tool.poetry.group.doc]
 optional = true

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,3 @@
+"""Unit test package for Pantograph."""
+
+
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,28 @@
+import pytest
+import pandas as pd
+from pathlib import Path
+from pantograph import Server
+from loguru import logger
+logger.remove()
+LOGGER_FORMAT = "<green>{level}</green> | <lvl>{message}</lvl>"
+logger.add(lambda msg: print(msg, end=''), format=LOGGER_FORMAT, colorize=True)
+
+PROJECT_ROOT = Path(__file__).parent.parent
+
+# Experiment for MiniF2F
+MINIF2F_VALID_FILE = PROJECT_ROOT / 'experiments/minif2f/valid.jsonl'
+MINIF2F_TEST_FILE = PROJECT_ROOT / 'experiments/minif2f/test.jsonl'
+MINIF2F_ROOT = PROJECT_ROOT / 'experiments/minif2f/MiniF2F'
+
+@pytest.fixture
+def minif2f_valid():
+    return pd.read_json(MINIF2F_VALID_FILE, lines=True)
+
+@pytest.fixture
+def minif2f_test():
+    return pd.read_json(MINIF2F_TEST_FILE, lines=True)
+
+@pytest.fixture
+def minif2f_server():
+    server = Server(project_path=MINIF2F_ROOT, imports=['Mathlib', 'Aesop'])
+    return server
diff --git a/tests/test_minif2f.py b/tests/test_minif2f.py
@@ -0,0 +1,83 @@
+import pandas as pd
+from pandas import DataFrame, Series
+from pantograph import Server
+from tqdm import tqdm
+import pytest
+from loguru import logger
+
+default_header = """
+set_option maxHeartbeats 0
+open BigOperators Real Nat Topology Rat
+"""
+
+def verify_theorem_loading(server: Server, theorem: str) -> tuple[bool, str]:
+    """Helper function to verify theorem loading."""
+    try:
+        unit = server.load_sorry(f"{default_header}\n{theorem}")[2]
+        goal_state, message = unit.goal_state, '\n'.join(unit.messages)
+        is_valid = (
+            goal_state is not None and 
+            len(goal_state.goals) == 1 and 
+            'error' not in message.lower()
+        )
+        return is_valid, message
+    except Exception as e:
+        logger.error(f"Exception while loading theorem: {e}")
+        return False, str(e)
+
+@pytest.mark.basic
+def test_single_case(minif2f_server: Server, minif2f_test: DataFrame):
+    """Test loading of a single theorem case."""
+    logger.info("Starting single case test")
+    test_theorem = minif2f_test.iloc[0].formal_statement
+    is_valid, message = verify_theorem_loading(minif2f_server, test_theorem)
+    if is_valid:
+        logger.success("Single case test passed successfully")
+    else:
+        logger.error(f"Single case test failed with message: {message}")
+    assert is_valid, f"Failed to load theorem: {message}"
+
+@pytest.mark.basic
+def test_load_theorem(minif2f_server: Server, minif2f_test: DataFrame, minif2f_valid: DataFrame):
+    """Comprehensive test for loading multiple theorems."""
+    logger.info("Theorem loading test")
+    # Test valid theorems
+    logger.info("Testing valid theorems...")
+    failed_valid = []
+    for i, theorem in tqdm(enumerate(minif2f_valid.formal_statement), 
+                          desc="Testing valid theorems", 
+                          total=len(minif2f_valid)):
+        is_valid, _ = verify_theorem_loading(minif2f_server, theorem)
+        if not is_valid:
+            failed_valid.append(i)
+    # Test test theorems
+    logger.info("Testing test theorems...")
+    failed_test = []
+    for i, theorem in tqdm(enumerate(minif2f_test.formal_statement), 
+                          desc="Testing test theorems", 
+                          total=len(minif2f_test)):
+        is_valid, _ = verify_theorem_loading(minif2f_server, theorem)
+        if not is_valid:
+            failed_test.append(i)
+
+    # Report results
+    total_valid = len(minif2f_valid)
+    total_test = len(minif2f_test)
+    failed_valid_count = len(failed_valid)
+    failed_test_count = len(failed_test)
+    logger.info(f"""
+    Test Summary:
+    Valid theorems: {total_valid - failed_valid_count}/{total_valid} passed
+    Test theorems: {total_test - failed_test_count}/{total_test} passed
+    """)
+    # Detailed failure report
+    if failed_valid:
+        logger.error(f"Failed valid theorems: {failed_valid}")
+    if failed_test:
+        logger.error(f"Failed test theorems: {failed_test}")
+    assert not failed_valid, f"{failed_valid_count} valid theorems failed"
+    assert not failed_test, f"{failed_test_count} test theorems failed"
+
+@pytest.mark.advance
+def test_advance_cases():
+    pass