-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4df6628
Showing
13 changed files
with
307 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: CI | ||
on: [push] | ||
jobs: | ||
lint_test_and_build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: actions/setup-python@v3 | ||
with: | ||
python-version: "3.7" | ||
- name: Install Poetry | ||
uses: snok/install-poetry@v1 | ||
- name: Install dependencies | ||
run: poetry install --no-interaction | ||
- name: flake8 linting | ||
run: poetry run flake8 . | ||
- name: black code formatting | ||
run: poetry run black . --check | ||
- name: mypy type checking | ||
run: poetry run mypy . | ||
- name: pytest | ||
run: poetry run pytest | ||
- name: build | ||
run: poetry build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
.coverage | ||
.vscode/ | ||
.idea/ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
.python-version | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
poetry.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# mac filesystem artifacts | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Frame Semantic Transformer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = '0.1.0' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from __future__ import annotations | ||
from dataclasses import dataclass | ||
from typing import Any, Iterable, Mapping | ||
|
||
|
||
@dataclass | ||
class SampleSentence: | ||
text: str | ||
trigger_loc: tuple[int, int] | ||
frame: str | ||
frame_element_locs: list[tuple[int, int, str]] | ||
|
||
@property | ||
def trigger(self) -> str: | ||
return self.text[self.trigger_loc[0] : self.trigger_loc[1]] | ||
|
||
@property | ||
def trigger_labeled_text(self) -> str: | ||
pre_span = self.text[0 : self.trigger_loc[0]] | ||
post_span = self.text[self.trigger_loc[1] :] | ||
return f"{pre_span} * {self.trigger} * {post_span}" | ||
|
||
@property | ||
def frame_elements(self) -> list[tuple[str, str]]: | ||
return [ | ||
(element, self.text[loc_start:loc_end]) | ||
for (loc_start, loc_end, element) in self.frame_element_locs | ||
] | ||
|
||
|
||
def parse_samples_from_exemplars( | ||
exemplars: Iterable[Mapping[str, Any]] | ||
) -> list[SampleSentence]: | ||
""" | ||
Helper to parse sample sentences out of framenet exemplars, contained in lexical units | ||
ex: lu = fn.lus()[0]; samples = parse_samples_from_exemplars(lu.exemplars) | ||
""" | ||
sample_sentences: list[SampleSentence] = [] | ||
for exemplar in exemplars: | ||
for annotation in exemplar["annotationSet"]: | ||
if "FE" in annotation and "Target" in annotation and "frame" in annotation: | ||
assert len(annotation["Target"]) == 1 | ||
assert annotation["FE"][1] == {} | ||
sample_sentences.append( | ||
SampleSentence( | ||
text=annotation["text"], | ||
trigger_loc=annotation["Target"][0], | ||
frame=annotation["frame"]["name"], | ||
frame_element_locs=annotation["FE"][0], | ||
) | ||
) | ||
return sample_sentences |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import nltk | ||
|
||
|
||
def ensure_framenet_downloaded() -> None: | ||
nltk.download("framenet_v17") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
[tool.poetry] | ||
name = "frame-semantic-transformer" | ||
version = "0.1.0" | ||
description = "" | ||
authors = ["David Chanin <chanindav@gmail.com>"] | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.7" | ||
nltk = "^3.7" | ||
|
||
[tool.poetry.dev-dependencies] | ||
pytest = "^5.2" | ||
black = "^22.3.0" | ||
mypy = "^0.950" | ||
flake8 = "^4.0.1" | ||
syrupy = "^2.0.0" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[flake8] | ||
extend-ignore = E203,E501 | ||
|
||
[mypy] | ||
follow_imports = silent | ||
strict_optional = True | ||
warn_redundant_casts = True | ||
warn_unused_ignores = True | ||
disallow_any_generics = True | ||
check_untyped_defs = True | ||
no_implicit_reexport = True | ||
disallow_untyped_defs = True | ||
namespace_packages = True | ||
|
||
[mypy-tests.*] | ||
ignore_missing_imports = True |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from frame_semantic_transformer.data.framenet_download import ensure_framenet_downloaded | ||
|
||
ensure_framenet_downloaded() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# name: test_parse_samples_from_exemplars | ||
list([ | ||
SampleSentence(text='The word is that the usurper , or those acting in his name , have sent out a call for all Scots lords and landed men to repair there , to Annan , to do homage to him . ', trigger_loc=(120, 126), frame='Self_motion', frame_element_locs=[(86, 116, 'Self_mover'), (127, 145, 'Goal'), (146, 165, 'Purpose')]), | ||
SampleSentence(text='The latter were forced to repair to the waiting area where they enviously sipped ginger beer while waiting their turn and watching the old horses pull the machines into the water , one after another . ', trigger_loc=(26, 32), frame='Self_motion', frame_element_locs=[(0, 10, 'Self_mover'), (33, 198, 'Goal')]), | ||
SampleSentence(text='To do the Cafe Posta properly , one repairs to the antique-filled back room ; front parlour strictly for tourists . ', trigger_loc=(36, 43), frame='Self_motion', frame_element_locs=[(0, 29, 'Purpose'), (32, 35, 'Self_mover'), (44, 75, 'Goal')]), | ||
SampleSentence(text='Two of the cast fainted and most of the rest repaired to the nearest bar . ', trigger_loc=(45, 53), frame='Self_motion', frame_element_locs=[(28, 44, 'Self_mover'), (54, 72, 'Goal')]), | ||
SampleSentence(text="Shortly they 'll both repair to the bedroom for another three minute interview break . ", trigger_loc=(22, 28), frame='Self_motion', frame_element_locs=[(0, 7, 'Time'), (8, 21, 'Self_mover'), (29, 43, 'Goal'), (44, 84, 'Purpose')]), | ||
SampleSentence(text='The service over , Joshua repaired to the clinically clean Gents . ', trigger_loc=(26, 34), frame='Self_motion', frame_element_locs=[(0, 16, 'Time'), (19, 25, 'Self_mover'), (35, 64, 'Goal')]), | ||
SampleSentence(text='He repaired to his kitchen area , flagrantly disregarding the woman who was being sick three rows behind the two men . ', trigger_loc=(3, 11), frame='Self_motion', frame_element_locs=[(0, 2, 'Self_mover'), (12, 31, 'Goal')]), | ||
SampleSentence(text='He and Sergeant Joe , having met in Petticoat Lane and having talked , repaired to a pub and had a long and very friendly conversation . ', trigger_loc=(71, 79), frame='Self_motion', frame_element_locs=[(0, 19, 'Self_mover'), (22, 68, 'Time'), (80, 88, 'Goal')]), | ||
SampleSentence(text='Fishermen would repair to the woods to cut hazel and withies to make or mend their pots , then set out in their small boats to their chosen ` grounds " to drop the pots . ', trigger_loc=(16, 22), frame='Self_motion', frame_element_locs=[(0, 9, 'Self_mover'), (23, 35, 'Goal'), (36, 87, 'Purpose')]), | ||
]) | ||
# --- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from syrupy.assertion import SnapshotAssertion | ||
from nltk.corpus import framenet as fn | ||
|
||
from frame_semantic_transformer.data.SampleSentence import parse_samples_from_exemplars | ||
|
||
|
||
def test_parse_samples_from_exemplars(snapshot: SnapshotAssertion) -> None: | ||
lu = fn.lu(6403) # repair.v | ||
|
||
samples = parse_samples_from_exemplars(lu.exemplars) | ||
|
||
assert len(samples) == 9 | ||
assert ( | ||
samples[0].text | ||
== "The word is that the usurper , or those acting in his name , have sent out a call for all Scots lords and landed men to repair there , to Annan , to do homage to him . " | ||
) | ||
assert samples[0].trigger == "repair" | ||
assert samples[0].frame_elements == [ | ||
("Self_mover", "all Scots lords and landed men"), | ||
("Goal", "there , to Annan ,"), | ||
("Purpose", "to do homage to him"), | ||
] | ||
assert samples == snapshot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from frame_semantic_transformer import __version__ | ||
|
||
|
||
def test_version(): | ||
assert __version__ == '0.1.0' |