diff --git a/para_tri_dataset/paraphrase_dataset/base.py b/para_tri_dataset/paraphrase_dataset/base.py index 9f250a0..c47ffc4 100644 --- a/para_tri_dataset/paraphrase_dataset/base.py +++ b/para_tri_dataset/paraphrase_dataset/base.py @@ -26,7 +26,6 @@ class Phrase(AbstractDataclass): class ParaphraseDataset(abc.ABC): - @classmethod @abc.abstractmethod def from_config(cls, cfg: Config) -> "ParaphraseDataset": diff --git a/para_tri_dataset/paraphrase_dataset/para_phraser_plus/file_dataset.py b/para_tri_dataset/paraphrase_dataset/para_phraser_plus/file_dataset.py index 059699a..f134050 100644 --- a/para_tri_dataset/paraphrase_dataset/para_phraser_plus/file_dataset.py +++ b/para_tri_dataset/paraphrase_dataset/para_phraser_plus/file_dataset.py @@ -27,8 +27,9 @@ class ParaPhraserPlusPhrase(Phrase): text: str -def parse_json_dataset(dataset: SerializedDatasetType)\ - -> Generator[Tuple[ParaPhraserPlusPhrase, Tuple[int, ...]], None, None]: +def parse_json_dataset( + dataset: SerializedDatasetType, +) -> Generator[Tuple[ParaPhraserPlusPhrase, Tuple[int, ...]], None, None]: offset = 0 for serialized_record in dataset.values(): @@ -111,7 +112,7 @@ def get_paraphrases(self, phrase: ParaPhraserPlusPhrase) -> Tuple[ParaPhraserPlu try: paraphrases_ids = self.phrases_relations[phrase.id] except IndexError as err: - raise ValueError(f'not fount phrase by id {phrase.id}') from err + raise ValueError(f"not fount phrase by id {phrase.id}") from err return tuple(self.get_phrase_by_id(p_id) for p_id in paraphrases_ids) diff --git a/para_tri_dataset/paraphrase_dataset/para_phraser_plus/test/test_file_dataset.py b/para_tri_dataset/paraphrase_dataset/para_phraser_plus/test/test_file_dataset.py index 729b0ec..1ed0917 100644 --- a/para_tri_dataset/paraphrase_dataset/para_phraser_plus/test/test_file_dataset.py +++ b/para_tri_dataset/paraphrase_dataset/para_phraser_plus/test/test_file_dataset.py @@ -41,7 +41,10 @@ def dataset(phrase_a, phrase_b) -> ParaPhraserPlusFileDataset: phrase_a, phrase_b, ), - ((1,), (0,),) + ( + (1,), + (0,), + ), ) @@ -75,11 +78,15 @@ def test_iterate_phrases(dataset: ParaPhraserPlusFileDataset, phrase_a, phrase_b assert next(dataset.iterate_phrases(offset=1)) == phrase_b phrases_id = tuple(dataset.iterate_phrases_id()) - assert phrases_id == (0, 1,) + assert phrases_id == ( + 0, + 1, + ) -def test_get_paraphrases(dataset: ParaPhraserPlusFileDataset, phrase_a: ParaPhraserPlusPhrase, - phrase_b: ParaPhraserPlusPhrase): +def test_get_paraphrases( + dataset: ParaPhraserPlusFileDataset, phrase_a: ParaPhraserPlusPhrase, phrase_b: ParaPhraserPlusPhrase +): paraphrases_a = dataset.get_paraphrases(phrase_a) assert len(paraphrases_a) == 1 @@ -98,8 +105,9 @@ def test_get_paraphrases(dataset: ParaPhraserPlusFileDataset, phrase_a: ParaPhra assert paraphrases_id_b[0] == phrase_a.id -def test_get_phrase_by_idx(dataset: ParaPhraserPlusFileDataset, phrase_a: ParaPhraserPlusPhrase, - phrase_b: ParaPhraserPlusPhrase): +def test_get_phrase_by_idx( + dataset: ParaPhraserPlusFileDataset, phrase_a: ParaPhraserPlusPhrase, phrase_b: ParaPhraserPlusPhrase +): assert phrase_a == dataset.get_phrase_by_id(0) assert phrase_b == dataset.get_phrase_by_id(1) diff --git a/para_tri_dataset/phrase_vector_model/base.py b/para_tri_dataset/phrase_vector_model/base.py index 048f8ea..bc6e9e2 100644 --- a/para_tri_dataset/phrase_vector_model/base.py +++ b/para_tri_dataset/phrase_vector_model/base.py @@ -30,7 +30,6 @@ class PhraseNumpyVector(PhraseVector): class PhraseVectorModel(abc.ABC): - @classmethod @abc.abstractmethod def from_config(cls, cfg: Config) -> "PhraseVectorModel":