diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd index a902ebad941..f64e0e93413 100644 --- a/spacy/tokenizer.pxd +++ b/spacy/tokenizer.pxd @@ -23,11 +23,7 @@ cdef class Tokenizer: cdef object _infix_finditer cdef object _rules cdef PhraseMatcher _special_matcher - # TODO convert to bool in v4 - cdef int _faster_heuristics - # TODO next one is unused and should be removed in v4 - # https://github.com/explosion/spaCy/pull/9150 - cdef int _unused_int2 + cdef bint _faster_heuristics cdef Doc _tokenize_affixes(self, str string, bint with_special_cases) cdef int _apply_special_cases(self, Doc doc) except -1 diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index c95392a2026..9b79207f82e 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -8,11 +8,18 @@ from libcpp.set cimport set as stdset from preshed.maps cimport PreshMap import re + +from .tokens.doc cimport Doc +from .strings cimport hash_string from .lexeme cimport EMPTY_LEXEME from .strings cimport hash_string from .tokens.doc cimport Doc +from .attrs import intify_attrs +from .symbols import ORTH, NORM +from .errors import Errors from . import util +from .util import get_words_and_spaces from .attrs import intify_attrs from .errors import Errors from .scorer import Scorer @@ -124,10 +131,10 @@ cdef class Tokenizer: property faster_heuristics: def __get__(self): - return bool(self._faster_heuristics) + return self._faster_heuristics def __set__(self, faster_heuristics): - self._faster_heuristics = bool(faster_heuristics) + self._faster_heuristics = faster_heuristics self._reload_special_cases() def __reduce__(self): diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd index 43e47af1dee..b91ce3ab45b 100644 --- a/spacy/vocab.pxd +++ b/spacy/vocab.pxd @@ -32,7 +32,6 @@ cdef class Vocab: cdef public object writing_system cdef public object get_noun_chunks cdef readonly int length - cdef public object _unused_object # TODO remove in v4, see #9150 cdef public object lex_attr_getters cdef public object cfg diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi index b7ff20348a0..7f5f23e7847 100644 --- a/spacy/vocab.pyi +++ b/spacy/vocab.pyi @@ -73,7 +73,6 @@ def unpickle_vocab( sstore: StringStore, vectors: Any, morphology: Any, - _unused_object: Any, lex_attr_getters: Any, lookups: Any, get_noun_chunks: Any, diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index c03226e2467..834f21c35dc 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -579,21 +579,18 @@ def pickle_vocab(vocab): sstore = vocab.strings vectors = vocab.vectors morph = vocab.morphology - _unused_object = vocab._unused_object lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters) lookups = vocab.lookups get_noun_chunks = vocab.get_noun_chunks return (unpickle_vocab, - (sstore, vectors, morph, _unused_object, lex_attr_getters, lookups, get_noun_chunks)) + (sstore, vectors, morph, lex_attr_getters, lookups, get_noun_chunks)) -def unpickle_vocab(sstore, vectors, morphology, _unused_object, - lex_attr_getters, lookups, get_noun_chunks): +def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lookups, get_noun_chunks): cdef Vocab vocab = Vocab() vocab.vectors = vectors vocab.strings = sstore vocab.morphology = morphology - vocab._unused_object = _unused_object vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters) vocab.lookups = lookups vocab.get_noun_chunks = get_noun_chunks