Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test conda error #1

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: Python Package

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.10"]
os: [ubuntu-latest]

steps:
- name: Install elan
run: |
set -o pipefail
curl -sSfL https://github.com/leanprover/elan/releases/download/v3.1.1/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz
./elan-init -y --default-toolchain none
echo "$HOME/.elan/bin" >> $GITHUB_PATH

- uses: actions/checkout@v3
with:
submodules: recursive
- name: Install Dependencies
run: |
pushd experiments/minif2f/MiniF2F/
lake exe cache get && lake build
popd

- name: Cache dependencies
uses: actions/cache@v3
with:
path: |
~/.cache/pip
~/.cache/pypoetry
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-

- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
echo "export PATH=$HOME/.local/bin:$PATH" >> $GITHUB_ENV
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
poetry build
poetry install
- name: Test with pytest
run: |
poetry run pytest -s tests/
File renamed without changes.
38 changes: 19 additions & 19 deletions experiments/minif2f/test.jsonl

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions experiments/minif2f/valid.jsonl

Large diffs are not rendered by default.

2,325 changes: 1,229 additions & 1,096 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python = "^3.10"

[tool.poetry.build]
generate-setup-file = false
script = "build.py"
script = "custom_build.py"

[tool.poetry.group.dev.dependencies]
# Experiment related dependencies here to not clutter the main project dependencies.
Expand All @@ -34,6 +34,8 @@ termcolor = "^2.4.0"
matplotlib = "^3.9.2"
seaborn = "^0.13.2"
pandas = "^2.2.3"
pytest = "^8.3.4"
loguru = "^0.7.3"

[tool.poetry.group.doc]
optional = true
Expand Down
3 changes: 3 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Unit test package for Pantograph."""


28 changes: 28 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest
import pandas as pd
from pathlib import Path
from pantograph import Server
from loguru import logger
logger.remove()
LOGGER_FORMAT = "<green>{level}</green> | <lvl>{message}</lvl>"
logger.add(lambda msg: print(msg, end=''), format=LOGGER_FORMAT, colorize=True)

PROJECT_ROOT = Path(__file__).parent.parent

# Experiment for MiniF2F
MINIF2F_VALID_FILE = PROJECT_ROOT / 'experiments/minif2f/valid.jsonl'
MINIF2F_TEST_FILE = PROJECT_ROOT / 'experiments/minif2f/test.jsonl'
MINIF2F_ROOT = PROJECT_ROOT / 'experiments/minif2f/MiniF2F'

@pytest.fixture
def minif2f_valid():
return pd.read_json(MINIF2F_VALID_FILE, lines=True)

@pytest.fixture
def minif2f_test():
return pd.read_json(MINIF2F_TEST_FILE, lines=True)

@pytest.fixture
def minif2f_server():
server = Server(project_path=MINIF2F_ROOT, imports=['Mathlib', 'Aesop'])
return server
83 changes: 83 additions & 0 deletions tests/test_minif2f.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import pandas as pd
from pandas import DataFrame, Series
from pantograph import Server
from tqdm import tqdm
import pytest
from loguru import logger

default_header = """
set_option maxHeartbeats 0
open BigOperators Real Nat Topology Rat
"""

def verify_theorem_loading(server: Server, theorem: str) -> tuple[bool, str]:
"""Helper function to verify theorem loading."""
try:
unit = server.load_sorry(f"{default_header}\n{theorem}")[2]
goal_state, message = unit.goal_state, '\n'.join(unit.messages)
is_valid = (
goal_state is not None and
len(goal_state.goals) == 1 and
'error' not in message.lower()
)
return is_valid, message
except Exception as e:
logger.error(f"Exception while loading theorem: {e}")
return False, str(e)

@pytest.mark.basic
def test_single_case(minif2f_server: Server, minif2f_test: DataFrame):
"""Test loading of a single theorem case."""
logger.info("Starting single case test")
test_theorem = minif2f_test.iloc[0].formal_statement
is_valid, message = verify_theorem_loading(minif2f_server, test_theorem)
if is_valid:
logger.success("Single case test passed successfully")
else:
logger.error(f"Single case test failed with message: {message}")
assert is_valid, f"Failed to load theorem: {message}"

@pytest.mark.basic
def test_load_theorem(minif2f_server: Server, minif2f_test: DataFrame, minif2f_valid: DataFrame):
"""Comprehensive test for loading multiple theorems."""
logger.info("Theorem loading test")
# Test valid theorems
logger.info("Testing valid theorems...")
failed_valid = []
for i, theorem in tqdm(enumerate(minif2f_valid.formal_statement),
desc="Testing valid theorems",
total=len(minif2f_valid)):
is_valid, _ = verify_theorem_loading(minif2f_server, theorem)
if not is_valid:
failed_valid.append(i)
# Test test theorems
logger.info("Testing test theorems...")
failed_test = []
for i, theorem in tqdm(enumerate(minif2f_test.formal_statement),
desc="Testing test theorems",
total=len(minif2f_test)):
is_valid, _ = verify_theorem_loading(minif2f_server, theorem)
if not is_valid:
failed_test.append(i)

# Report results
total_valid = len(minif2f_valid)
total_test = len(minif2f_test)
failed_valid_count = len(failed_valid)
failed_test_count = len(failed_test)
logger.info(f"""
Test Summary:
Valid theorems: {total_valid - failed_valid_count}/{total_valid} passed
Test theorems: {total_test - failed_test_count}/{total_test} passed
""")
# Detailed failure report
if failed_valid:
logger.error(f"Failed valid theorems: {failed_valid}")
if failed_test:
logger.error(f"Failed test theorems: {failed_test}")
assert not failed_valid, f"{failed_valid_count} valid theorems failed"
assert not failed_test, f"{failed_test_count} test theorems failed"

@pytest.mark.advance
def test_advance_cases():
pass
Loading