Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
chanind committed Apr 30, 2022
0 parents commit 4df6628
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: CI
on: [push]
jobs:
lint_test_and_build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: "3.7"
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: poetry install --no-interaction
- name: flake8 linting
run: poetry run flake8 .
- name: black code formatting
run: poetry run black . --check
- name: mypy type checking
run: poetry run mypy .
- name: pytest
run: poetry run pytest
- name: build
run: poetry build
144 changes: 144 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
.coverage
.vscode/
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# mac filesystem artifacts
.DS_Store
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Frame Semantic Transformer
1 change: 1 addition & 0 deletions frame_semantic_transformer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.1.0'
52 changes: 52 additions & 0 deletions frame_semantic_transformer/data/SampleSentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Iterable, Mapping


@dataclass
class SampleSentence:
text: str
trigger_loc: tuple[int, int]
frame: str
frame_element_locs: list[tuple[int, int, str]]

@property
def trigger(self) -> str:
return self.text[self.trigger_loc[0] : self.trigger_loc[1]]

@property
def trigger_labeled_text(self) -> str:
pre_span = self.text[0 : self.trigger_loc[0]]
post_span = self.text[self.trigger_loc[1] :]
return f"{pre_span} * {self.trigger} * {post_span}"

@property
def frame_elements(self) -> list[tuple[str, str]]:
return [
(element, self.text[loc_start:loc_end])
for (loc_start, loc_end, element) in self.frame_element_locs
]


def parse_samples_from_exemplars(
exemplars: Iterable[Mapping[str, Any]]
) -> list[SampleSentence]:
"""
Helper to parse sample sentences out of framenet exemplars, contained in lexical units
ex: lu = fn.lus()[0]; samples = parse_samples_from_exemplars(lu.exemplars)
"""
sample_sentences: list[SampleSentence] = []
for exemplar in exemplars:
for annotation in exemplar["annotationSet"]:
if "FE" in annotation and "Target" in annotation and "frame" in annotation:
assert len(annotation["Target"]) == 1
assert annotation["FE"][1] == {}
sample_sentences.append(
SampleSentence(
text=annotation["text"],
trigger_loc=annotation["Target"][0],
frame=annotation["frame"]["name"],
frame_element_locs=annotation["FE"][0],
)
)
return sample_sentences
5 changes: 5 additions & 0 deletions frame_semantic_transformer/data/framenet_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import nltk


def ensure_framenet_downloaded() -> None:
nltk.download("framenet_v17")
20 changes: 20 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[tool.poetry]
name = "frame-semantic-transformer"
version = "0.1.0"
description = ""
authors = ["David Chanin <chanindav@gmail.com>"]

[tool.poetry.dependencies]
python = "^3.7"
nltk = "^3.7"

[tool.poetry.dev-dependencies]
pytest = "^5.2"
black = "^22.3.0"
mypy = "^0.950"
flake8 = "^4.0.1"
syrupy = "^2.0.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
16 changes: 16 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[flake8]
extend-ignore = E203,E501

[mypy]
follow_imports = silent
strict_optional = True
warn_redundant_casts = True
warn_unused_ignores = True
disallow_any_generics = True
check_untyped_defs = True
no_implicit_reexport = True
disallow_untyped_defs = True
namespace_packages = True

[mypy-tests.*]
ignore_missing_imports = True
Empty file added tests/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from frame_semantic_transformer.data.framenet_download import ensure_framenet_downloaded

ensure_framenet_downloaded()
13 changes: 13 additions & 0 deletions tests/data/__snapshots__/test_SampleSentence.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# name: test_parse_samples_from_exemplars
list([
SampleSentence(text='The word is that the usurper , or those acting in his name , have sent out a call for all Scots lords and landed men to repair there , to Annan , to do homage to him . ', trigger_loc=(120, 126), frame='Self_motion', frame_element_locs=[(86, 116, 'Self_mover'), (127, 145, 'Goal'), (146, 165, 'Purpose')]),
SampleSentence(text='The latter were forced to repair to the waiting area where they enviously sipped ginger beer while waiting their turn and watching the old horses pull the machines into the water , one after another . ', trigger_loc=(26, 32), frame='Self_motion', frame_element_locs=[(0, 10, 'Self_mover'), (33, 198, 'Goal')]),
SampleSentence(text='To do the Cafe Posta properly , one repairs to the antique-filled back room ; front parlour strictly for tourists . ', trigger_loc=(36, 43), frame='Self_motion', frame_element_locs=[(0, 29, 'Purpose'), (32, 35, 'Self_mover'), (44, 75, 'Goal')]),
SampleSentence(text='Two of the cast fainted and most of the rest repaired to the nearest bar . ', trigger_loc=(45, 53), frame='Self_motion', frame_element_locs=[(28, 44, 'Self_mover'), (54, 72, 'Goal')]),
SampleSentence(text="Shortly they 'll both repair to the bedroom for another three minute interview break . ", trigger_loc=(22, 28), frame='Self_motion', frame_element_locs=[(0, 7, 'Time'), (8, 21, 'Self_mover'), (29, 43, 'Goal'), (44, 84, 'Purpose')]),
SampleSentence(text='The service over , Joshua repaired to the clinically clean Gents . ', trigger_loc=(26, 34), frame='Self_motion', frame_element_locs=[(0, 16, 'Time'), (19, 25, 'Self_mover'), (35, 64, 'Goal')]),
SampleSentence(text='He repaired to his kitchen area , flagrantly disregarding the woman who was being sick three rows behind the two men . ', trigger_loc=(3, 11), frame='Self_motion', frame_element_locs=[(0, 2, 'Self_mover'), (12, 31, 'Goal')]),
SampleSentence(text='He and Sergeant Joe , having met in Petticoat Lane and having talked , repaired to a pub and had a long and very friendly conversation . ', trigger_loc=(71, 79), frame='Self_motion', frame_element_locs=[(0, 19, 'Self_mover'), (22, 68, 'Time'), (80, 88, 'Goal')]),
SampleSentence(text='Fishermen would repair to the woods to cut hazel and withies to make or mend their pots , then set out in their small boats to their chosen ` grounds " to drop the pots . ', trigger_loc=(16, 22), frame='Self_motion', frame_element_locs=[(0, 9, 'Self_mover'), (23, 35, 'Goal'), (36, 87, 'Purpose')]),
])
# ---
23 changes: 23 additions & 0 deletions tests/data/test_SampleSentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from syrupy.assertion import SnapshotAssertion
from nltk.corpus import framenet as fn

from frame_semantic_transformer.data.SampleSentence import parse_samples_from_exemplars


def test_parse_samples_from_exemplars(snapshot: SnapshotAssertion) -> None:
lu = fn.lu(6403) # repair.v

samples = parse_samples_from_exemplars(lu.exemplars)

assert len(samples) == 9
assert (
samples[0].text
== "The word is that the usurper , or those acting in his name , have sent out a call for all Scots lords and landed men to repair there , to Annan , to do homage to him . "
)
assert samples[0].trigger == "repair"
assert samples[0].frame_elements == [
("Self_mover", "all Scots lords and landed men"),
("Goal", "there , to Annan ,"),
("Purpose", "to do homage to him"),
]
assert samples == snapshot
5 changes: 5 additions & 0 deletions tests/test_frame_semantic_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from frame_semantic_transformer import __version__


def test_version():
assert __version__ == '0.1.0'

0 comments on commit 4df6628

Please sign in to comment.