CogComp · cowchipkid · Aug 3, 2018 · Aug 2, 2018 · Aug 2, 2018 · Aug 2, 2018
diff --git a/ner/config/log4j.properties b/ner/config/log4j.properties
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=INFO, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
diff --git a/ner/src/main/java/edu/illinois/cs/cogcomp/ner/LbjTagger/LearningCurveMultiDataset.java b/ner/src/main/java/edu/illinois/cs/cogcomp/ner/LbjTagger/LearningCurveMultiDataset.java
@@ -12,7 +12,6 @@
 import edu.illinois.cs.cogcomp.lbjava.learn.BatchTrainer;
 import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron;
 import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner;
-import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SparseNetworkOptimizer;
 import edu.illinois.cs.cogcomp.lbjava.parse.Parser;
 import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.ExpressiveFeaturesAnnotator;
 import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.TwoLayerPredictionAggregationFeatures;
@@ -26,6 +25,11 @@
 import java.io.IOException;
 import java.util.Vector;
 
+/**
+ * build the model test it out, report results and so on.
+ * @author nick
+ * @author redman
+ */
 public class LearningCurveMultiDataset {
 
     private static final String NAME = LearningCurveMultiDataset.class.getCanonicalName();
@@ -42,7 +46,7 @@ public class LearningCurveMultiDataset {
      * @param devDataPath data used to auto-converge.
      */
     public static void buildFinalModel(int fixedNumIterations, String trainDataPath,
-            String testDataPath, String devDataPath) throws Exception {
+            String testDataPath, String devDataPath, boolean incremental) throws Exception {
         Data trainData = new Data(trainDataPath, trainDataPath, "-c", new String[] {}, new String[] {});
         ExpressiveFeaturesAnnotator.annotate(trainData);
         Data testData = new Data(testDataPath, testDataPath, "-c", new String[] {}, new String[] {});
@@ -56,7 +60,7 @@ public static void buildFinalModel(int fixedNumIterations, String trainDataPath,
         test.addElement(devData);
         logger.debug("Building final model: iterations = " + fixedNumIterations + " train = '"
                         + trainDataPath + "' test = '"+testDataPath+"' dev = '" + testDataPath+"'");
-        getLearningCurve(train, test, fixedNumIterations);
+        getLearningCurve(train, test, fixedNumIterations, incremental);
     }
 
     /**
@@ -65,7 +69,7 @@ public static void buildFinalModel(int fixedNumIterations, String trainDataPath,
      * use fixedNumIterations=-1 if you want to use the automatic convergence criterion
      */
     public static void getLearningCurve(int fixedNumIterations, String trainDataPath,
-            String testDataPath) throws Exception {
+            String testDataPath, boolean incremental) throws Exception {
         logger.debug("getLearningCurve(): fni = " + fixedNumIterations + " trainDataPath = '"
                 + trainDataPath + "' testDataPath = '" + testDataPath + "'....");
         Data trainData =
@@ -78,16 +82,18 @@ public static void getLearningCurve(int fixedNumIterations, String trainDataPath
         train.addElement(trainData);
         Vector<Data> test = new Vector<>();
         test.addElement(testData);
-        getLearningCurve(train, test, fixedNumIterations);
+        getLearningCurve(train, test, fixedNumIterations, incremental);
     }
 
     /**
-     * use fixedNumIterations=-1 if you want to use the automatic convergence criterion
+     * use fixedNumIterations=-1 if you want to use the automatic convergence criterion, incremental
+     * true will start with the existing models weights, and continue training with that set of default
+     * weights.
      * <p>
      * NB: assuming column format
      */
     public static void getLearningCurve(Vector<Data> trainDataSet, Vector<Data> testDataSet,
-            int fixedNumIterations) throws Exception {
+            int fixedNumIterations, boolean incremental) throws Exception {
         double bestF1Level1 = -1;
         int bestRoundLevel1 = 0;
         // Get the directory name (<configname>.model is appended in LbjTagger/Parameters.java:139)
@@ -114,9 +120,15 @@ public static void getLearningCurve(Vector<Data> trainDataSet, Vector<Data> test
         paramLevel1.baseLTU.featurePruningThreshold = ParametersForLbjCode.currentParameters.featurePruningThreshold;
         logger.info("Level 1 classifier learning rate = "+ParametersForLbjCode.currentParameters.learningRatePredictionsLevel1+
             ", thickness = "+ParametersForLbjCode.currentParameters.thicknessPredictionsLevel1);
+
         NETaggerLevel1 tagger1 =
                 new NETaggerLevel1(paramLevel1, modelPath + ".level1", modelPath + ".level1.lex");
-        tagger1.forget();
+        if (!incremental) {
+            logger.info("Training L1 model from scratch.");
+            tagger1.forget();
+        } else {
+            logger.info("Training L1 model incrementally.");
+        }
         ParametersForLbjCode.currentParameters.taggerLevel1 = tagger1;
         for (int dataId = 0; dataId < trainDataSet.size(); dataId++) {
             Data trainData = trainDataSet.elementAt(dataId);
@@ -192,7 +204,12 @@ public static void getLearningCurve(Vector<Data> trainDataSet, Vector<Data> test
                 new NETaggerLevel2(paramLevel2, ParametersForLbjCode.currentParameters.pathToModelFile
                         + ".level2", ParametersForLbjCode.currentParameters.pathToModelFile
                         + ".level2.lex");
-        tagger2.forget();
+        if (!incremental) {
+            logger.info("Training L2 model from scratch.");
+            tagger2.forget();
+        } else {
+            logger.info("Training L2 model incrementally.");
+        }
         ParametersForLbjCode.currentParameters.taggerLevel2 = tagger2;
 
         // Previously checked if PatternFeatures was in featuresToUse.

diff --git a/ner/src/main/java/edu/illinois/cs/cogcomp/ner/Main.java b/ner/src/main/java/edu/illinois/cs/cogcomp/ner/Main.java
@@ -248,14 +248,18 @@ protected void inputMenu() {
             System.out.println("Loading resources...");
             if (resourceManager == null)
                 this.resourceManager = new NerBaseConfigurator().getDefaultConfig();
-
-            String modelName = this.resourceManager.getString(NerBaseConfigurator.MODEL_NAME);
-            if (modelName.toLowerCase().equals("conll"))
-                this.nerAnnotator = new NERAnnotator(this.resourceManager, ViewNames.NER_CONLL);
-            else if (modelName.toLowerCase().equals("ontonotes"))
-                this.nerAnnotator = new NERAnnotator(this.resourceManager, ViewNames.NER_ONTONOTES);
-            else 
-                this.nerAnnotator = new NERAnnotator(this.resourceManager, "NER_OTHER");
+            String viewName = this.resourceManager.getString(NerBaseConfigurator.VIEW_NAME);
+            if (viewName == null) {
+                String modelName = this.resourceManager.getString(NerBaseConfigurator.MODEL_NAME);
+                if (modelName.toLowerCase().equals("conll"))
+                    this.nerAnnotator = new NERAnnotator(this.resourceManager, ViewNames.NER_CONLL);
+                else if (modelName.toLowerCase().equals("ontonotes"))
+                    this.nerAnnotator = new NERAnnotator(this.resourceManager, ViewNames.NER_ONTONOTES);
+                else 
+                    this.nerAnnotator = new NERAnnotator(this.resourceManager, "NER_OTHER");
+            } else {
+                this.nerAnnotator = new NERAnnotator(this.resourceManager, viewName);
+            }
             System.out.println("Completed loading resources, assuming a ");
         }
 

diff --git a/ner/src/main/java/edu/illinois/cs/cogcomp/ner/ModelLoader.java b/ner/src/main/java/edu/illinois/cs/cogcomp/ner/ModelLoader.java
@@ -37,9 +37,10 @@ public class ModelLoader {
      * Load the models wherever they are found. Check file system first, then classpath, and finally get it 
      * from Minio datastore.
      * @param rm the resource manager.
+     * @param training if we are training.
      * @param viewName the name of the view identifies the model.
      */
-    static public void load(ResourceManager rm, String viewName) {
+    static public void load(ResourceManager rm, String viewName, boolean training) {
 
         // the loaded built into the model will check the local file system and the jar files in the classpath.
         ParametersForLbjCode cp = ParametersForLbjCode.currentParameters;
@@ -65,6 +66,18 @@ static public void load(ResourceManager rm, String viewName) {
             } else {
                 logger.info("L2 model not required.");
             }
+        } else if (training) {
+
+            // we are training a new model, so it it doesn't exist, we don't care, just create a
+            // container.
+            tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
+            logger.info("Reading L1 model from file : " + modelPath + ".level2");
+            if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
+                tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
+                logger.info("Reading L2 model from file : " + modelPath + ".level2");
+            } else {
+                logger.info("L2 model not required.");
+            }
         } else {
 
             // all else has filed, load from the datastore, create artifact ids based on the view

diff --git a/ner/src/main/java/edu/illinois/cs/cogcomp/ner/NERAnnotator.java b/ner/src/main/java/edu/illinois/cs/cogcomp/ner/NERAnnotator.java
@@ -25,6 +25,7 @@
 import edu.illinois.cs.cogcomp.core.utilities.configuration.Configurator;
 import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
 import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner;
 import edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector;
 import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.ExpressiveFeaturesAnnotator;
 import edu.illinois.cs.cogcomp.ner.InferenceMethods.Decoder;
@@ -47,6 +48,12 @@ public class NERAnnotator extends Annotator {
     /** our specific logger. */
     private final Logger logger = LoggerFactory.getLogger(NERAnnotator.class);
 
+    /** the level one model. */
+    public SparseNetworkLearner taggerLevel1;
+
+    /** the level two model. */
+    public SparseNetworkLearner taggerLevel2;
+
     /**
      * @param nonDefaultConfigValues a configuration file specifying non-default parameters for the
      *        NER model to use
@@ -82,7 +89,9 @@ public NERAnnotator(ResourceManager nonDefaultRm, String viewName) {
                 AnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, Configurator.TRUE), nonDefaultRm);
     }
 
-
+    /** this is used to sync loading models. */
+    static final String LOADING_MODELS = "LOADING_MODELS";
+
     /**
      * Superclass calls this method either on instantiation or at first call to getView(). Logging
      * has been disabled because non-static logger is not initialized at the time this is called if
@@ -102,7 +111,11 @@ public void initialize(ResourceManager nerRm) {
         Parameters.readConfigAndLoadExternalData(nerRm);
 
         // load the models.
-        ModelLoader.load(nerRm, viewName);
+        synchronized (LOADING_MODELS) {
+            ModelLoader.load(nerRm, viewName, false);
+            this.taggerLevel1 = ParametersForLbjCode.currentParameters.taggerLevel1;
+            this.taggerLevel2 = ParametersForLbjCode.currentParameters.taggerLevel2;
+        }
     }
 
     /**
@@ -139,8 +152,8 @@ public void addView(TextAnnotation ta) {
         Data data = new Data(new NERDocument(sentences, "input"));
         try {
             ExpressiveFeaturesAnnotator.annotate(data);
-            Decoder.annotateDataBIO(data, (NETaggerLevel1) ParametersForLbjCode.currentParameters.taggerLevel1, 
-                (NETaggerLevel2) ParametersForLbjCode.currentParameters.taggerLevel2);
+            Decoder.annotateDataBIO(data, (NETaggerLevel1) taggerLevel1, 
+                (NETaggerLevel2) taggerLevel2);
         } catch (Exception e) {
             logger.error("Cannot annotate the text, the exception was: ", e);
             return;
@@ -228,7 +241,7 @@ public Set<String> getTagValues() {
         if (!isInitialized()) {
             doInitialize();
         }
-        Lexicon labelLexicon =  ParametersForLbjCode.currentParameters.taggerLevel1.getLabelLexicon();
+        Lexicon labelLexicon =  taggerLevel1.getLabelLexicon();
         Set<String> tagSet = new HashSet<String>();
         for (int i =0; i < labelLexicon.size(); ++i) {
             tagSet.add(labelLexicon.lookupKey(i).getStringValue());

diff --git a/ner/src/main/java/edu/illinois/cs/cogcomp/ner/NerBenchmark.java b/ner/src/main/java/edu/illinois/cs/cogcomp/ner/NerBenchmark.java
@@ -8,8 +8,10 @@
 package edu.illinois.cs.cogcomp.ner;
 
 import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
+import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron;
+import edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel1;
+import edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel2;
 import edu.illinois.cs.cogcomp.ner.LbjTagger.*;
-import edu.illinois.cs.cogcomp.ner.config.NerBaseConfigurator;
 
 import java.io.File;
 import java.io.FilenameFilter;
@@ -44,6 +46,7 @@
  * -features : for debugging, reports the feature vector for each token in the dataset. Output produced in a "features.out" file.
  * -iterations : specify a fixed number of iterations, or -1 (the default) means auto converge requiring a "dev" directory.
  * -release : build a final model for release, it will build on test and train, and unless "-iterations" specified, it will autoconvert
+ * -incremental : rather than discarding existing weights, start with those that already exist and continue training.
  * using "dev" for a holdout set.
  * }
  */
@@ -70,6 +73,10 @@ public class NerBenchmark {
     /** Report the input features for each level */
     static boolean verbose = false;
 
+    /** If this is set, we will start with the existing weights (and averages) for the 
+     * model and continue training from there. */
+    static boolean incremental = false;
+
     /** the output file name. */
     static String output = null;
 
@@ -94,6 +101,10 @@ private static void parseArguments(String[] args) {
                     }
                     directory = args[i];
                     break;
+                case "-incremental":
+                    System.out.println("Configured for incremental training.");
+                    incremental = true;
+                    break;
                 case "-verbose":
                     verbose = true;
                     break;
@@ -184,11 +195,25 @@ public boolean accept(File dir, String name) {
                             System.out.println("\n\n----- Training models for evaluation for "+confFile+" ------");
                             Parameters.readConfigAndLoadExternalData(confFile, !skiptraining);
                             ResourceManager rm = new ResourceManager(confFile);
-                            ModelLoader.load(rm, rm.getString("modelName"));
+                            ModelLoader.load(rm, rm.getString("modelName"), !skiptraining);
 
+                            // report learning rates and thicknesses
+                            NETaggerLevel1 taggerLevel1 = (NETaggerLevel1) ParametersForLbjCode.currentParameters.taggerLevel1;
+                            NETaggerLevel2 taggerLevel2 = (NETaggerLevel2) ParametersForLbjCode.currentParameters.taggerLevel2;
+                            SparseAveragedPerceptron sap1 = (SparseAveragedPerceptron)taggerLevel1.getBaseLTU();
+                            sap1.setLearningRate(ParametersForLbjCode.currentParameters.learningRatePredictionsLevel1);
+                            sap1.setThickness(ParametersForLbjCode.currentParameters.thicknessPredictionsLevel1);
+                            System.out.println("L1 learning rate = "+sap1.getLearningRate()+", thickness = "+sap1.getPositiveThickness());
+                            if (ParametersForLbjCode.currentParameters.featuresToUse.containsKey("PredictionsLevel1")) {
+                                SparseAveragedPerceptron sap2 = (SparseAveragedPerceptron)taggerLevel2.getBaseLTU();
+                                sap2.setLearningRate(ParametersForLbjCode.currentParameters.learningRatePredictionsLevel2);
+                                sap2.setThickness(ParametersForLbjCode.currentParameters.thicknessPredictionsLevel2);
+                                System.out.println("L2 learning rate = "+sap2.getLearningRate()+", thickness = "+sap2.getPositiveThickness());
+                            }
+
                             // there is a training directory, with training enabled, so train. We use the same dataset
                             // for both training and evaluating.
-                            LearningCurveMultiDataset.getLearningCurve(iterations, trainDirName, devDirName);
+                            LearningCurveMultiDataset.getLearningCurve(iterations, trainDirName, devDirName, incremental);
                             System.out.println("\n\n----- Final results for "+confFile+", verbose ------");
                             NETesterMultiDataset.test(testDirName, true,
                                     ParametersForLbjCode.currentParameters.labelsToIgnoreInEvaluation,
@@ -205,7 +230,7 @@ public boolean accept(File dir, String name) {
                         System.out.println("\n\n----- Reporting results from existing models for "+confFile+" ------");
                         Parameters.readConfigAndLoadExternalData(confFile, !skiptraining);
                         ResourceManager rm = new ResourceManager(confFile);
-                        ModelLoader.load(rm, rm.getString("modelName"));
+                        ModelLoader.load(rm, rm.getString("modelName"), !skiptraining);
                         System.out.println("Benchmark against configuration : " + confFile);
                         if (reportLabels)
                             NEDisplayPredictions.test(testDirName, "-c", verbose);
@@ -221,12 +246,12 @@ else if (reportFeatures)
                         if (trainDir.exists() && testDir.exists() && devDir.exists()) {
                             Parameters.readConfigAndLoadExternalData(confFile, !skiptraining);
                             ResourceManager rm = new ResourceManager(confFile);
-                            ModelLoader.load(rm, rm.getString("modelName"));
+                            ModelLoader.load(rm, rm.getString("modelName"), true);
                             System.out.println("\n\n----- Building a final model for "+confFile+" ------");
 
                             // there is a training directory, with training enabled, so train. We use the same dataset
                             // for both training and evaluating.
-                            LearningCurveMultiDataset.buildFinalModel(iterations, trainDirName, testDirName, devDirName);
+                            LearningCurveMultiDataset.buildFinalModel(iterations, trainDirName, testDirName, devDirName, incremental);
                             System.out.println("\n\n----- Release results for "+confFile+", verbose ------");
                             NETesterMultiDataset.test(devDirName, true,
                                     ParametersForLbjCode.currentParameters.labelsToIgnoreInEvaluation,

diff --git a/ner/src/main/java/edu/illinois/cs/cogcomp/ner/NerTagger.java b/ner/src/main/java/edu/illinois/cs/cogcomp/ner/NerTagger.java
@@ -38,13 +38,13 @@ public static void main(String[] args) {
             Parameters.readConfigAndLoadExternalData(args[args.length - 1], areWeTraining);
 
             if (args[0].equalsIgnoreCase("-train"))
-                LearningCurveMultiDataset.getLearningCurve(-1, args[1], args[2]);
+                LearningCurveMultiDataset.getLearningCurve(-1, args[1], args[2], false);
             else if (args[0].equalsIgnoreCase("-trainFixedIterations"))
                 LearningCurveMultiDataset.getLearningCurve(Integer.parseInt(args[1]), args[2],
-                        args[3]);
+                        args[3], false);
             else {
                 // load up the models
-                ModelLoader.load(rm, rm.getString("modelName"));
+                ModelLoader.load(rm, rm.getString("modelName"), false);
                 if (args[0].equalsIgnoreCase("-annotate")) {
                     NETagPlain.init();
                     NETagPlain.tagData(args[1], args[2]);