Skip to content

Commit

Permalink
Cleanup Cython structs (#11337)
Browse files Browse the repository at this point in the history
* cleanup Tokenizer fields

* remove unused object from vocab

* remove IS_OOV_DEPRECATED

* add back in as FLAG13

* FLAG 18 instead

* import fix

* fix clumpsy fingers

* revert symbol changes in favor of #11352

* bint instead of bool
  • Loading branch information
svlandeg authored Aug 22, 2022
1 parent d757dec commit 1a5be63
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 17 deletions.
6 changes: 1 addition & 5 deletions spacy/tokenizer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,7 @@ cdef class Tokenizer:
cdef object _infix_finditer
cdef object _rules
cdef PhraseMatcher _special_matcher
# TODO convert to bool in v4
cdef int _faster_heuristics
# TODO next one is unused and should be removed in v4
# https://github.com/explosion/spaCy/pull/9150
cdef int _unused_int2
cdef bint _faster_heuristics

cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
cdef int _apply_special_cases(self, Doc doc) except -1
Expand Down
9 changes: 4 additions & 5 deletions spacy/tokenizer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@ from preshed.maps cimport PreshMap
cimport cython

import re
import warnings

from .tokens.doc cimport Doc
from .strings cimport hash_string
from .lexeme cimport EMPTY_LEXEME

from .attrs import intify_attrs
from .symbols import ORTH, NORM
from .errors import Errors, Warnings
from .errors import Errors
from . import util
from .util import registry, get_words_and_spaces
from .util import get_words_and_spaces
from .attrs import intify_attrs
from .symbols import ORTH
from .scorer import Scorer
Expand Down Expand Up @@ -128,10 +127,10 @@ cdef class Tokenizer:

property faster_heuristics:
def __get__(self):
return bool(self._faster_heuristics)
return self._faster_heuristics

def __set__(self, faster_heuristics):
self._faster_heuristics = bool(faster_heuristics)
self._faster_heuristics = faster_heuristics
self._reload_special_cases()

def __reduce__(self):
Expand Down
1 change: 0 additions & 1 deletion spacy/vocab.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ cdef class Vocab:
cdef public object writing_system
cdef public object get_noun_chunks
cdef readonly int length
cdef public object _unused_object # TODO remove in v4, see #9150
cdef public object lex_attr_getters
cdef public object cfg

Expand Down
1 change: 0 additions & 1 deletion spacy/vocab.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def unpickle_vocab(
sstore: StringStore,
vectors: Any,
morphology: Any,
_unused_object: Any,
lex_attr_getters: Any,
lookups: Any,
get_noun_chunks: Any,
Expand Down
7 changes: 2 additions & 5 deletions spacy/vocab.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -558,21 +558,18 @@ def pickle_vocab(vocab):
sstore = vocab.strings
vectors = vocab.vectors
morph = vocab.morphology
_unused_object = vocab._unused_object
lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters)
lookups = vocab.lookups
get_noun_chunks = vocab.get_noun_chunks
return (unpickle_vocab,
(sstore, vectors, morph, _unused_object, lex_attr_getters, lookups, get_noun_chunks))
(sstore, vectors, morph, lex_attr_getters, lookups, get_noun_chunks))


def unpickle_vocab(sstore, vectors, morphology, _unused_object,
lex_attr_getters, lookups, get_noun_chunks):
def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lookups, get_noun_chunks):
cdef Vocab vocab = Vocab()
vocab.vectors = vectors
vocab.strings = sstore
vocab.morphology = morphology
vocab._unused_object = _unused_object
vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters)
vocab.lookups = lookups
vocab.get_noun_chunks = get_noun_chunks
Expand Down

0 comments on commit 1a5be63

Please sign in to comment.