Skip to content

Commit

Permalink
Merge pull request #7 from flaxandteal/fix/issue-6-WordNetCorpusReade…
Browse files Browse the repository at this point in the history
…r-has-no-att

fix(issue#6): Ensured wordnet and stopwords are loaded before used.
  • Loading branch information
KamenDimitrov97 authored Aug 21, 2024
2 parents 94773c3 + c50ea36 commit 53f8ce7
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 11 deletions.
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

## STAGE 1 - Core package(s)

FROM ghcr.io/pyo3/maturin:main as maturin
FROM ghcr.io/pyo3/maturin:main AS maturin

RUN mkdir -p /app/build/bonn
WORKDIR /app/build/test_data
# WORKDIR /app/build/test_data
# RUN curl -L -O "...wiki/wiki.en.fifu"
WORKDIR /app/build

Expand All @@ -19,8 +19,8 @@ COPY README.md /app/build

RUN RUSTFLAGS="-L /usr/lib64/atlas -C link-args=-lsatlas -ltatlas -llapack" cargo install finalfusion-utils --features=opq

COPY pyproject.toml /app/build
COPY src /app/build/src
COPY bonn /app/build/bonn
COPY pyproject.toml /app/build
COPY python/bonn /app/build/bonn

WORKDIR /app/build
5 changes: 1 addition & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@ RESET := $(shell tput -Txterm sgr0)
all: build

.PHONY: build
build: Dockerfile
build:
@mkdir -p $(BUILD)/wheels
docker build -t bonn_py_build -f Dockerfile .
docker run --platform "linux/amd64" --entrypoint maturin -v $(shell pwd)/$(BUILD)/wheels:/app/build/target/wheels bonn_py_build build --find-interpreter

Dockerfile:
m4 Dockerfile.in > Dockerfile

test_data/wiki.en.fifu:
curl -o test_data/wiki.en.fifu http://www.sfs.uni-tuebingen.de/a3-public-data/finalfusion-fasttext/wiki/wiki.en.fifu

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "bonn"
version = "0.1.5"
version = "0.1.6"
description = "Created for ONS. Proof-of-concept mmap'd Rust word2vec implementation linked with category matching"
readme = "README.md"
license = { "file" = "LICENSE.md" }
Expand All @@ -29,5 +29,5 @@ classifiers = [
]

[tool.maturin]
python-source = "python"
python-source = ""
module-name = "bonn._bonn"
4 changes: 3 additions & 1 deletion python/bonn/category_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
import math
import re
from sortedcontainers import SortedDict
from nltk.corpus import stopwords
from nltk.corpus import stopwords, wordnet
from nltk.stem.wordnet import WordNetLemmatizer

from .utils import cosine_similarities
stopwords.ensure_loaded()
wordnet.ensure_loaded()

re_ws = re.compile(r"\s+")
re_num = re.compile(r"[^\w\s\']", flags=re.UNICODE)
Expand Down

0 comments on commit 53f8ce7

Please sign in to comment.