From 7e8b69c8ec7d67f4615eaecb64c001cc8a13003d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Wed, 31 Aug 2022 14:18:31 +0200 Subject: [PATCH] Sync to changes in PRs on which this one depends --- .../configs/base-config-transition.cfg | 173 ++++++++++++++++++ .../biaffine_parser/configs/base-config.cfg | 2 +- .../biaffine_parser/arc_predicter.pyx | 2 +- 3 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 projects/biaffine_parser/configs/base-config-transition.cfg diff --git a/projects/biaffine_parser/configs/base-config-transition.cfg b/projects/biaffine_parser/configs/base-config-transition.cfg new file mode 100644 index 0000000..0703857 --- /dev/null +++ b/projects/biaffine_parser/configs/base-config-transition.cfg @@ -0,0 +1,173 @@ +[paths] +train = null +dev = null + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = null +pipeline = ["transformer","tagger","morphologizer","senter","parser"] +disabled = ["senter"] +before_creation = null +after_creation = null +after_pipeline_creation = null +batch_size = 64 +tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"} + +[components] + +[components.parser] +factory = "parser" +learn_tokens = false +min_action_freq = 30 +moves = null +scorer = {"@scorers":"spacy.parser_scorer.v1"} +update_with_oracle_cut_size = 100 + +[components.parser.model] +@architectures = "spacy.TransitionBasedParser.v2" +state_type = "parser" +extra_state_tokens = false +hidden_width = 64 +maxout_pieces = 2 +use_upper = false +nO = null + +[components.parser.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 +upstream = "transformer" +pooling = {"@layers":"reduce_mean.v1"} + +[components.transformer] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.transformer.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = null +tokenizer_config = {"use_fast": true} +mixed_precision = true + +[components.transformer.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 96 + +[components.morphologizer] +factory = "morphologizer" + +[components.morphologizer.model] +@architectures = "spacy.Tagger.v1" +nO = null + +[components.morphologizer.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 +upstream = "transformer" +pooling = {"@layers":"reduce_mean.v1"} + +[components.senter] +factory = "senter" +store_activations = true + +[components.senter.model] +@architectures = "spacy.Tagger.v1" +nO = null + +[components.senter.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 +upstream = "transformer" +pooling = {"@layers":"reduce_mean.v1"} + +[components.tagger] +factory = "tagger" + +[components.tagger.model] +@architectures = "spacy.Tagger.v1" +nO = null + +[components.tagger.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 +upstream = "transformer" +pooling = {"@layers":"reduce_mean.v1"} + +[corpora] + +[corpora.train] +@readers = "spacy.Corpus.v1" +path = ${paths.train} +max_length = 500 +gold_preproc = false +limit = 0 +augmenter = null + +[corpora.dev] +@readers = "spacy.Corpus.v1" +max_length = 0 +path = ${paths.dev} +gold_preproc = false +limit = 0 +augmenter = null + +[training] +train_corpus = "corpora.train" +dev_corpus = "corpora.dev" +seed = ${system:seed} +gpu_allocator = ${system:gpu_allocator} +dropout = 0.1 +accumulate_gradient = 3 +patience = 5000 +max_epochs = 0 +max_steps = 20000 +eval_frequency = 1000 +frozen_components = [] +before_to_disk = null +annotating_components = [] + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +discard_oversize = true +get_length = null +size = 2000 +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 1.0 +use_averages = true +eps = 0.00000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = 20000 +initial_rate = 0.00005 + +[training.score_weights] +tag_acc = 0.2 +pos_acc = 0.2 +morph_acc = 0.2 +morph_per_feat = null +dep_uas = 0.0 +dep_las = 0.2 +dep_las_per_type = null +sents_p = null +sents_r = null +sents_f = 0.2 + +[initialize] +vectors = null diff --git a/projects/biaffine_parser/configs/base-config.cfg b/projects/biaffine_parser/configs/base-config.cfg index 52e251e..73eda8c 100644 --- a/projects/biaffine_parser/configs/base-config.cfg +++ b/projects/biaffine_parser/configs/base-config.cfg @@ -79,7 +79,7 @@ pooling = {"@layers":"reduce_mean.v1"} [components.senter] factory = "senter" -store_activations = true +save_activations = true [components.senter.model] @architectures = "spacy.Tagger.v1" diff --git a/spacy_experimental/biaffine_parser/arc_predicter.pyx b/spacy_experimental/biaffine_parser/arc_predicter.pyx index 83f6614..bc6a188 100644 --- a/spacy_experimental/biaffine_parser/arc_predicter.pyx +++ b/spacy_experimental/biaffine_parser/arc_predicter.pyx @@ -319,7 +319,7 @@ def split_lazily(docs: List[Doc], *, ops: Ops, max_length: int, senter: Sentence activations = doc.activations.get(senter.name, None) if activations is None: raise ValueError("Greedy splitting requires senter with `store_activations` enabled.") - scores = activations['probs'] + scores = activations['probabilities'] split_recursive(scores[:,1], ops, max_length, lens) assert sum(lens) == sum([len(doc) for doc in docs])