From f958908130d88083095f32a1646372565a3824f0 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 09:26:50 -0700 Subject: [PATCH 01/16] Working on OPF model RST docstrings --- docs/source/api/opf/models.rst | 10 ++++++---- docs/source/api/opf/utils.rst | 4 ++++ src/nupic/frameworks/opf/model.py | 10 +++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/docs/source/api/opf/models.rst b/docs/source/api/opf/models.rst index b884dbf466..700834d83f 100644 --- a/docs/source/api/opf/models.rst +++ b/docs/source/api/opf/models.rst @@ -1,20 +1,22 @@ Models ====== -Model -^^^^^ +Base Model +^^^^^^^^^^ + +.. automodule:: nupic.frameworks.opf.model .. autoclass:: nupic.frameworks.opf.model.Model :members: HTMPredictionModel -^^^^^^^^ +^^^^^^^^^^^^^^^^^^ .. autoclass:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel :members: :show-inheritance: -======= + TwoGramModel ^^^^^^^^^^^^ diff --git a/docs/source/api/opf/utils.rst b/docs/source/api/opf/utils.rst index 83390f7239..2e77e7fcf0 100644 --- a/docs/source/api/opf/utils.rst +++ b/docs/source/api/opf/utils.rst @@ -12,3 +12,7 @@ Helpers .. automodule:: nupic.frameworks.opf.opf_helpers :members: + +.. autoclass:: nupic.frameworks.opf.opf_utils.InferenceType + :members: + :show-inheritance: diff --git a/src/nupic/frameworks/opf/model.py b/src/nupic/frameworks/opf/model.py index bf6253baaa..4261ebe2bf 100644 --- a/src/nupic/frameworks/opf/model.py +++ b/src/nupic/frameworks/opf/model.py @@ -33,18 +33,18 @@ class Model(object): """ This is the base class that all OPF Model implementations should subclass. + It includes a number of virtual methods, to be overridden by subclasses, as well as some shared functionality for saving/loading models + + :param inferenceType: (:class:`~nupic.frameworks.opf.opf_utils.InferenceType`) + A value that specifies the type of inference (i.e. TemporalNextStep, + Classification, etc.). """ __metaclass__ = ABCMeta def __init__(self, inferenceType): - """ Model constructor. - @param inferenceType (nupic.frameworks.opf.opf_utils.InferenceType) - A value that specifies the type of inference (i.e. TemporalNextStep, - Classification, etc.). - """ self._numPredictions = 0 self.__inferenceType = inferenceType self.__learningEnabled = True From bb5a321fe81ca5707ae38e87ced6102ac5e25a6b Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 09:30:39 -0700 Subject: [PATCH 02/16] Updated snakecase opf_utils in RST docs --- docs/source/api/opf/results.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/api/opf/results.rst b/docs/source/api/opf/results.rst index 05c2a9a055..e68ce2b3e5 100644 --- a/docs/source/api/opf/results.rst +++ b/docs/source/api/opf/results.rst @@ -4,13 +4,13 @@ Model Results ModelResult ^^^^^^^^^^^ -.. autoclass:: nupic.frameworks.opf.opfutils.ModelResult +.. autoclass:: nupic.frameworks.opf.opf_utils.ModelResult :members: SensorInput ^^^^^^^^^^^ -.. autoclass:: nupic.frameworks.opf.opfutils.SensorInput +.. autoclass:: nupic.frameworks.opf.opf_utils.SensorInput :members: InferenceShifter @@ -22,5 +22,5 @@ InferenceShifter InferenceType ^^^^^^^^^^^^^ -.. autoclass:: nupic.frameworks.opf.opfutils.InferenceType +.. autoclass:: nupic.frameworks.opf.opf_utils.InferenceType :members: From b3366ca0f57132cc31dd8f642d08eab209f27c85 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 10:28:56 -0700 Subject: [PATCH 03/16] Added RecordStreamIFace to RST cause OFP models need it --- docs/source/api/data/index.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/api/data/index.rst b/docs/source/api/data/index.rst index 9977e2b182..f0f57ba47f 100644 --- a/docs/source/api/data/index.rst +++ b/docs/source/api/data/index.rst @@ -12,3 +12,11 @@ FileRecordStream .. autoclass:: nupic.data.file_record_stream.FileRecordStream :members: + +RecordStream +^^^^^^^^^^^^ + +.. automodule:: nupic.data.record_stream + +.. autoclass:: nupic.data.record_stream.RecordStreamIface + :members: From 19fdf1cbf270f8f883e55b5d9718e4caff41e316 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 10:46:43 -0700 Subject: [PATCH 04/16] Fully docced OPF Model --- src/nupic/frameworks/opf/model.py | 121 +++++++++++++++++------------- 1 file changed, 70 insertions(+), 51 deletions(-) diff --git a/src/nupic/frameworks/opf/model.py b/src/nupic/frameworks/opf/model.py index 4261ebe2bf..16d2fdf5ea 100644 --- a/src/nupic/frameworks/opf/model.py +++ b/src/nupic/frameworks/opf/model.py @@ -38,8 +38,7 @@ class Model(object): as well as some shared functionality for saving/loading models :param inferenceType: (:class:`~nupic.frameworks.opf.opf_utils.InferenceType`) - A value that specifies the type of inference (i.e. TemporalNextStep, - Classification, etc.). + A value that specifies the type of inference. """ __metaclass__ = ABCMeta @@ -52,16 +51,18 @@ def __init__(self, inferenceType): self.__inferenceArgs = {} def run(self, inputRecord): - """ Run one iteration of this model. - @param inputRecord (object) + """ + Run one iteration of this model. + + :param inputRecord: (object) A record object formatted according to - nupic.data.record_stream.RecordStreamIface.getNextRecord() or - nupic.data.record_stream.RecordStreamIface.getNextRecordDict() + :meth:`~nupic.data.record_stream.RecordStreamIface.getNextRecord` or + :meth:`~nupic.data.record_stream.RecordStreamIface.getNextRecordDict` result format. - @returns (nupic.frameworks.opf.opf_utils.ModelResult) + :returns: (:class:`~nupic.frameworks.opf.opf_utils.ModelResult`) An ModelResult namedtuple. The contents of ModelResult.inferences depends on the the specific inference type of this model, which - can be queried by getInferenceType() + can be queried by :meth:`.getInferenceType`. """ if hasattr(self, '_numPredictions'): predictionNumber = self._numPredictions @@ -78,9 +79,9 @@ def finishLearning(self): In such a mode the model will not be able to learn from subsequent input records. - **NOTE:** Upon completion of this command, learning may not be resumed on - the given instance of the model (e.g., the implementation may optimize - itself by pruning data structures that are necessary for learning). + .. note:: Upon completion of this command, learning may not be resumed on + the given instance of the model (e.g., the implementation may optimize + itself by pruning data structures that are necessary for learning). """ @abstractmethod @@ -89,32 +90,37 @@ def resetSequenceStates(self): @abstractmethod def getFieldInfo(self, includeClassifierOnlyField=False): - """ Return the sequence of FieldMetaInfo objects specifying the format of - Model's output. - This may be different than the list of FieldMetaInfo objects supplied at + """ + Return the sequence of :class:`~nupic.data.field_meta.FieldMetaInfo` objects + specifying the format of Model's output. + + This may be different than the list of + :class:`~nupic.data.field_meta.FieldMetaInfo` objects supplied at initialization (e.g., due to the transcoding of some input fields into meta-fields, such as datetime -> dayOfWeek, timeOfDay, etc.). - @param includeClassifierOnlyField (bool) + + :param includeClassifierOnlyField: (bool) If True, any field which is only sent to the classifier (i.e. not sent in to the bottom of the network) is also included - @returns (list) - List of FieldMetaInfo objects. + :returns: (list) of :class:`~nupic.data.field_meta.FieldMetaInfo` objects. """ @abstractmethod - def setFieldStatistics(self,fieldStats): + def setFieldStatistics(self, fieldStats): """ Propagate field statistics to the model in case some of its machinery needs it. - @param fieldStats (dict) + + :param fieldStats: (dict) A dict of dicts with first key being the fieldname and the second - key is min,max or other supported statistics + key is min,max or other supported statistics. """ @abstractmethod def getRuntimeStats(self): - """ Get runtime statistics specific to this model, - i.e. activeCellOverlapAvg. - @returns (dict) A {statistic names: stats} dictionary + """ Get runtime statistics specific to this model, i.e. + ``activeCellOverlapAvg``. + + :returns: (dict) A {statistic names: stats} dictionary """ @abstractmethod @@ -122,7 +128,8 @@ def _getLogger(self): """ Get the logger for this object. This is a protected method that is used by the ModelBase to access the logger created by the subclass. - @returns (Logger) A Logger object, it should not be None. + + :returns: (Logger) A Logger object, it should not be None. """ ############################################################################### @@ -132,7 +139,8 @@ def _getLogger(self): def getInferenceType(self): """ Return the InferenceType of this model. This is immutable. - @returns (nupic.frameworks.opf.opf_utils.InferenceType) An inference type + + :returns: :class:`~nupic.frameworks.opf.opf_utils.InferenceType` """ return self.__inferenceType @@ -148,13 +156,15 @@ def disableLearning(self): def isLearningEnabled(self): """ Return the Learning state of the current model. - @returns (bool) The learning state + + :returns: (bool) The learning state """ return self.__learningEnabled def enableInference(self, inferenceArgs=None): """ Enable inference for this model. - @param inferenceArgs (dict) + + :param inferenceArgs: (dict) A dictionary of arguments required for inference. These depend on the InferenceType of the current model """ @@ -163,7 +173,8 @@ def enableInference(self, inferenceArgs=None): def getInferenceArgs(self): """ Return the dict of arguments for the current inference mode. - @returns (dict) The arguments of the inference mode + + :returns: (dict) The arguments of the inference mode """ return self.__inferenceArgs @@ -173,7 +184,8 @@ def disableInference(self): def isInferenceEnabled(self): """ Return the inference state of the current model. - @returns (bool) The inference state + + :returns: (bool) The inference state """ return self.__inferenceEnabled @@ -189,16 +201,17 @@ def getProtoType(): @staticmethod def _getModelCheckpointFilePath(checkpointDir): """ Return the absolute path of the model's checkpoint file. - @param checkpointDir (string) + + :param checkpointDir (string) Directory of where the experiment is to be or was saved - @returns (string) An absolute path. + :returns: (string) An absolute path. """ path = os.path.join(checkpointDir, "model.data") path = os.path.abspath(path) return path def writeToCheckpoint(self, checkpointDir): - """Serializes model using capnproto and writes data to checkpointDir""" + """Serializes model using capnproto and writes data to ``checkpointDir``""" proto = self.getProtoType().new_message() self.write(proto) @@ -239,7 +252,7 @@ def readFromCheckpoint(cls, checkpointDir): def write(self, proto): """Write state to proto object. - The type of proto is determined by getProtoType(). + The type of proto is determined by :meth:`getProtoType`. """ raise NotImplementedError() @@ -247,7 +260,7 @@ def write(self, proto): def read(cls, proto): """Read state from proto object. - The type of proto is determined by getProtoType(). + The type of proto is determined by :meth:`getProtoType`. """ raise NotImplementedError() @@ -257,13 +270,14 @@ def read(cls, proto): def save(self, saveModelDir): """ Save the model in the given directory. - @param saveModelDir (string) - Absolute directory path for saving the model. This directory should - only be used to store a saved model. If the directory does not exist, - it will be created automatically and populated with model data. A - pre-existing directory will only be accepted if it contains previously - saved model data. If such a directory is given, the full contents of - the directory will be deleted and replaced with current model data. + + :param saveModelDir: (string) + Absolute directory path for saving the model. This directory should + only be used to store a saved model. If the directory does not exist, + it will be created automatically and populated with model data. A + pre-existing directory will only be accepted if it contains previously + saved model data. If such a directory is given, the full contents of + the directory will be deleted and replaced with current model data. """ logger = self._getLogger() logger.debug("(%s) Creating local checkpoint in %r...", @@ -308,16 +322,17 @@ def _serializeExtraData(self, extraDataDir): directory path. It can be overridden by subclasses to bypass pickle for saving large binary states. This is called by ModelBase only. - @param extraDataDir (string) Model's extra data directory path + :param extraDataDir (string) Model's extra data directory path """ pass @classmethod def load(cls, savedModelDir): """ Load saved model. - @param savedModelDir (string) + + :param savedModelDir: (string) Directory of where the experiment is to be or was saved - @returns (Model) The loaded model instance + :returns: (:class:`Model`) The loaded model instance """ logger = opf_utils.initLogger(cls) logger.debug("Loading model from local checkpoint at %r...", savedModelDir) @@ -345,17 +360,19 @@ def _deSerializeExtraData(self, extraDataDir): (after __setstate__) with an external directory path. It can be overridden by subclasses to bypass pickle for loading large binary states. - This is called by ModelBase only - @param extraDataDir (string) Model's extra data directory path + This is called by ModelBase only. + + :param extraDataDir: (string) Model's extra data directory path """ pass @staticmethod def _getModelPickleFilePath(saveModelDir): """ Return the absolute path of the model's pickle file. - @param saveModelDir (string) + + :param saveModelDir: (string) Directory of where the experiment is to be or was saved - @returns (string) An absolute path. + :returns: (string) An absolute path. """ path = os.path.join(saveModelDir, "model.pkl") path = os.path.abspath(path) @@ -365,9 +382,10 @@ def _getModelPickleFilePath(saveModelDir): def _getModelExtraDataDir(saveModelDir): """ Return the absolute path to the directory where the model's own "extra data" are stored (i.e., data that's too big for pickling). - @param saveModelDir (string) + + :param saveModelDir: (string) Directory of where the experiment is to be or was saved - @returns (string) An absolute path. + :returns: (string) An absolute path. """ path = os.path.join(saveModelDir, "modelextradata") path = os.path.abspath(path) @@ -378,7 +396,8 @@ def _getModelExtraDataDir(saveModelDir): def __makeDirectoryFromAbsolutePath(absDirPath): """ Make directory for the given directory path if it doesn't already exist in the filesystem. - @param absDirPath (string) Absolute path of the directory to create + + :param absDirPath: (string) Absolute path of the directory to create @exception (Exception) OSError if directory creation fails """ From 949dc685139de03b817aaa2494a159156284e022 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 12:26:51 -0700 Subject: [PATCH 05/16] Fully docced HTMPredictionModel --- docs/source/api/opf/models.rst | 10 +- .../frameworks/opf/htm_prediction_model.py | 143 +++++++++--------- src/nupic/frameworks/opf/model.py | 5 +- 3 files changed, 85 insertions(+), 73 deletions(-) diff --git a/docs/source/api/opf/models.rst b/docs/source/api/opf/models.rst index 700834d83f..d9c94a993b 100644 --- a/docs/source/api/opf/models.rst +++ b/docs/source/api/opf/models.rst @@ -13,10 +13,18 @@ Base Model HTMPredictionModel ^^^^^^^^^^^^^^^^^^ +.. automodule:: nupic.frameworks.opf.htm_prediction_model + .. autoclass:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel - :members: + :members: getParameter,getRuntimeStats :show-inheritance: + .. automethod:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel.setAnomalyParameter(param, value) + .. automethod:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel.getAnomalyParameter(param) + .. automethod:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel.anomalyRemoveLabels(start, end, labelFilter) + .. automethod:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel.anomalyAddLabel(start, end, labelName) + .. automethod:: nupic.frameworks.opf.htm_prediction_model.HTMPredictionModel.anomalyGetLabels(start, end) + TwoGramModel ^^^^^^^^^^^^ diff --git a/src/nupic/frameworks/opf/htm_prediction_model.py b/src/nupic/frameworks/opf/htm_prediction_model.py index 935797715b..bdcfc2a530 100644 --- a/src/nupic/frameworks/opf/htm_prediction_model.py +++ b/src/nupic/frameworks/opf/htm_prediction_model.py @@ -19,10 +19,9 @@ # http://numenta.org/licenses/ # ---------------------------------------------------------------------- -""" @file htm_prediction_model.py - -Encapsulation of CLAnetwork that implements the ModelBase. - +""" +Encapsulation of HTM network that implements the base :class:`Model` to perform +temporal prediction. """ import copy @@ -34,6 +33,7 @@ import traceback from collections import deque from operator import itemgetter +from functools import wraps import numpy @@ -54,7 +54,8 @@ except ImportError: capnp = None if capnp: - from nupic.frameworks.opf.HTMPredictionModelProto_capnp import HTMPredictionModelProto + from nupic.frameworks.opf.HTMPredictionModelProto_capnp \ + import HTMPredictionModelProto DEFAULT_LIKELIHOOD_THRESHOLD = 0.0001 @@ -69,6 +70,7 @@ def requireAnomalyModel(func): """ Decorator for functions that require anomaly models. """ + @wraps(func) def _decorator(self, *args, **kwargs): if not self.getInferenceType() == InferenceType.TemporalAnomaly: raise RuntimeError("Method required a TemporalAnomaly model.") @@ -102,6 +104,44 @@ def __repr__(self): class HTMPredictionModel(Model): + """ + + This model is for temporal predictions multiple steps ahead. + + :param inferenceType: (:class:`~nupic.frameworks.opf.opf_utils.InferenceType`) + + :param predictedField: (string) The field to predict for multistep prediction. + + :param sensorParams: (dict) specifying the sensor parameters. + + :param spEnable: (bool) Whether or not to use a spatial pooler. + + :param spParams: (dict) specifying the spatial pooler parameters. These are + passed to the spatial pooler. + + :param trainSPNetOnlyIfRequested: (bool) If set, don't create an SP network + unless the user requests SP metrics. + + :param tmEnable: (bool) Whether to use a temporal memory. + + :param tmParams: (dict) specifying the temporal memory parameters. These are + passed to the temporal memory. + + :param clEnable: (bool) Whether to use the classifier. If false, the + classifier will not be created and no predictions will be generated. + + :param clParams: (dict) specifying the classifier parameters. These are passed + to the classifier. + + :param anomalyParams: (dict) Anomaly detection parameters + + :param minLikelihoodThreshold: (float) The minimum likelihood value to include + in inferences. Currently only applies to multistep inferences. + + :param maxPredictionsPerStep: (int) Maximum number of predictions to include + for each step in inferences. The predictions with highest likelihood are + included. + """ __supportedInferenceKindSet = set((InferenceType.TemporalNextStep, InferenceType.TemporalClassification, @@ -131,31 +171,6 @@ def __init__(self, minLikelihoodThreshold=DEFAULT_LIKELIHOOD_THRESHOLD, maxPredictionsPerStep=DEFAULT_MAX_PREDICTIONS_PER_STEP, network=None): - """HTMPredictionModel constructor. - - Args: - inferenceType: A value from the InferenceType enum class. - predictedField: The field to predict for multistep prediction. - sensorParams: A dictionary specifying the sensor parameters. - spEnable: Whether or not to use a spatial pooler. - spParams: A dictionary specifying the spatial pooler parameters. These - are passed to the spatial pooler. - trainSPNetOnlyIfRequested: If set, don't create an SP network unless the - user requests SP metrics. - tmEnable: Whether to use a temporal memory. - tmParams: A dictionary specifying the temporal memory parameters. These - are passed to the temporal memory. - clEnable: Whether to use the classifier. If false, the classifier will - not be created and no predictions will be generated. - clParams: A dictionary specifying the classifier parameters. These are - are passed to the classifier. - anomalyParams: Anomaly detection parameters - minLikelihoodThreshold: The minimum likelihood value to include in - inferences. Currently only applies to multistep inferences. - maxPredictionsPerStep: Maximum number of predictions to include for - each step in inferences. The predictions with highest likelihood are - included. - """ if not inferenceType in self.__supportedInferenceKindSet: raise ValueError("{0} received incompatible inference type: {1}"\ .format(self.__class__, inferenceType)) @@ -233,6 +248,13 @@ def __init__(self, def getParameter(self, paramName): + """ + Currently only supports a parameter named ``__numRunCalls``. + + :param paramName: (string) name of parameter to get. If not + ``__numRunCalls`` an exception is thrown. + :returns: (int) the value of ``self.__numRunCalls`` + """ if paramName == '__numRunCalls': return self.__numRunCalls else: @@ -241,10 +263,6 @@ def getParameter(self, paramName): def resetSequenceStates(self): - """ [virtual method override] Resets the model's sequence states. Normally - called to force the delineation of a sequence, such as between OPF tasks. - """ - if self._hasTP: # Reset TM's sequence states self._getTPRegion().executeCommand(['resetSequenceStates']) @@ -256,14 +274,6 @@ def resetSequenceStates(self): def finishLearning(self): - """ [virtual method override] Places the model in a permanent "finished - learning" mode where it will not be able to learn from subsequent input - records. - - NOTE: Upon completion of this command, learning may not be resumed on - the given instance of the model (e.g., the implementation may optimize - itself by pruning data structures that are necessary for learning) - """ assert not self.__finishedLearning if self._hasSP: @@ -283,7 +293,7 @@ def finishLearning(self): return - def setFieldStatistics(self,fieldStats): + def setFieldStatistics(self, fieldStats): encoder = self._getEncoder() # Set the stats for the encoders. The first argument to setFieldStats # is the field name of the encoder. Since we are using a multiencoder @@ -293,13 +303,11 @@ def setFieldStatistics(self,fieldStats): def enableLearning(self): - """[override] Turn Learning on for the current model """ super(HTMPredictionModel, self).enableLearning() self.setEncoderLearning(True) def disableLearning(self): - """[override] Turn Learning off for the current model """ super(HTMPredictionModel, self).disableLearning() self.setEncoderLearning(False) @@ -313,6 +321,9 @@ def setEncoderLearning(self,learningEnabled): def setAnomalyParameter(self, param, value): """ Set a parameter of the anomaly classifier within this model. + + :param param: (string) name of parameter to set + :param value: (object) value to set """ self._getAnomalyClassifier().setParameter(param, value) @@ -320,7 +331,9 @@ def setAnomalyParameter(self, param, value): @requireAnomalyModel def getAnomalyParameter(self, param): """ - Get a parameter of the anomaly classifier within this model. + Get a parameter of the anomaly classifier within this model by key. + + :param param: (string) name of parameter to retrieve """ return self._getAnomalyClassifier().getParameter(param) @@ -329,6 +342,9 @@ def getAnomalyParameter(self, param): def anomalyRemoveLabels(self, start, end, labelFilter): """ Remove labels from the anomaly classifier within this model. + + :param start: (int) index to start removing labels + :param end: (int) index to end removing labels """ self._getAnomalyClassifier().getSelf().removeLabels(start, end, labelFilter) @@ -337,6 +353,10 @@ def anomalyRemoveLabels(self, start, end, labelFilter): def anomalyAddLabel(self, start, end, labelName): """ Add labels from the anomaly classifier within this model. + + :param start: (int) index to start label + :param end: (int) index to end label + :param labelName: (string) name of label """ self._getAnomalyClassifier().getSelf().addLabel(start, end, labelName) @@ -345,21 +365,14 @@ def anomalyAddLabel(self, start, end, labelName): def anomalyGetLabels(self, start, end): """ Get labels from the anomaly classifier within this model. + + :param start: (int) index to start getting labels + :param end: (int) index to end getting labels """ return self._getAnomalyClassifier().getSelf().getLabels(start, end) def run(self, inputRecord): - """ run one iteration of this model. - args: - inputRecord is a record object formatted according to - nupic.data.RecordStream.getNextRecordDict() result format. - - return: - An ModelResult class (see opf_utils.py) The contents of - ModelResult.inferences depends on the the specific inference - type of this model, which can be queried by getInferenceType() - """ assert not self.__restoringFromState assert inputRecord @@ -933,11 +946,9 @@ def _removeUnlikelyPredictions(cls, likelihoodsDict, minLikelihoodThreshold, def getRuntimeStats(self): - """ [virtual method override] get runtime statistics specific to this - model, i.e. activeCellOverlapAvg - - return: - a dict where keys are statistic names and values are the stats + """ + Only returns data for a stat called ``numRunCalls``. + :return: """ ret = {"numRunCalls" : self.__numRunCalls} @@ -956,16 +967,6 @@ def getRuntimeStats(self): def getFieldInfo(self, includeClassifierOnlyField=False): - """ [virtual method override] - Returns the sequence of FieldMetaInfo objects specifying this - Model's output; note that this may be different than the list of - FieldMetaInfo objects supplied at initialization (e.g., due to the - transcoding of some input fields into meta-fields, such as datetime - -> dayOfWeek, timeOfDay, etc.) - - Returns: List of FieldMetaInfo objects (see description above) - """ - encoder = self._getEncoder() fieldNames = encoder.getScalarNames() diff --git a/src/nupic/frameworks/opf/model.py b/src/nupic/frameworks/opf/model.py index 16d2fdf5ea..45152648cf 100644 --- a/src/nupic/frameworks/opf/model.py +++ b/src/nupic/frameworks/opf/model.py @@ -86,7 +86,10 @@ def finishLearning(self): @abstractmethod def resetSequenceStates(self): - """ Signal that the input record is the start of a new sequence. """ + """ + Signal that the input record is the start of a new sequence. Normally called + to force the delineation of a sequence, such as between OPF tasks. + """ @abstractmethod def getFieldInfo(self, includeClassifierOnlyField=False): From 219e8316982ac049b5a34091d7b9a808d7febf00 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 13:11:20 -0700 Subject: [PATCH 06/16] Finished all OPF model docstrings --- docs/source/api/opf/models.rst | 16 ++++- src/nupic/frameworks/opf/model_factory.py | 31 +++++---- .../frameworks/opf/previous_value_model.py | 68 +++++-------------- src/nupic/frameworks/opf/two_gram_model.py | 59 +++------------- 4 files changed, 58 insertions(+), 116 deletions(-) diff --git a/docs/source/api/opf/models.rst b/docs/source/api/opf/models.rst index d9c94a993b..b7901ba9a8 100644 --- a/docs/source/api/opf/models.rst +++ b/docs/source/api/opf/models.rst @@ -1,6 +1,12 @@ Models ====== +ModelFactory +^^^^^^^^^^^^ + +.. autoclass:: nupic.frameworks.opf.model_factory.ModelFactory + :members: + Base Model ^^^^^^^^^^ @@ -29,12 +35,16 @@ HTMPredictionModel TwoGramModel ^^^^^^^^^^^^ +.. automodule:: nupic.frameworks.opf.two_gram_model + .. autoclass:: nupic.frameworks.opf.two_gram_model.TwoGramModel :members: :show-inheritance: -ModelFactory -^^^^^^^^^^^^ +PreviousValueModel +^^^^^^^^^^^^^^^^^^ -.. autoclass:: nupic.frameworks.opf.model_factory.ModelFactory +.. automodule:: nupic.frameworks.opf.previous_value_model + +.. autoclass:: nupic.frameworks.opf.previous_value_model.PreviousValueModel :members: diff --git a/src/nupic/frameworks/opf/model_factory.py b/src/nupic/frameworks/opf/model_factory.py index dc0dbb0e2b..4074bc8972 100644 --- a/src/nupic/frameworks/opf/model_factory.py +++ b/src/nupic/frameworks/opf/model_factory.py @@ -35,10 +35,9 @@ from previous_value_model import PreviousValueModel class ModelFactory(object): - """ Static factory class that produces a Model based on a description dict. - Eventually this will be the source for all Model creation, HTM and otherwise. - We may also implement building the description dict from a database or a - description.py file. For now, this is a very skeletal implementation. + """ + Static factory class that produces a :class:`nupic.frameworks.opf.model.Model` + based on a description dict. """ __logger = None @@ -46,7 +45,8 @@ class ModelFactory(object): @classmethod def __getLogger(cls): """ Get the logger for this object. - @returns (Logger) A Logger object. + + :returns: (Logger) A Logger object. """ if cls.__logger is None: cls.__logger = opf_utils.initLogger(cls) @@ -56,11 +56,16 @@ def __getLogger(cls): @staticmethod def create(modelConfig, logLevel=logging.ERROR): """ Create a new model instance, given a description dictionary. - @param modelConfig (dict) - A dictionary describing the current model (TODO: schema) - @param logLevel (int) The level of logging output that should be generated - @exception (Exception) Unsupported model type - @returns (nupic.frameworks.opf.model.Model) A model. + + :param modelConfig: (dict) + A dictionary describing the current model, + `described here <../../quick-start/example-model-params.html>`_. + + :param logLevel: (int) The level of logging output that should be generated + + :raises Exception: Unsupported model type + + :returns: :class:`nupic.frameworks.opf.model.Model` """ logger = ModelFactory.__getLogger() logger.setLevel(logLevel) @@ -82,9 +87,11 @@ def create(modelConfig, logLevel=logging.ERROR): @staticmethod def loadFromCheckpoint(savedModelDir, newSerialization=False): """ Load saved model. - @param savedModelDir (string) + + :param savedModelDir: (string) Directory of where the experiment is to be or was saved - @returns (nupic.frameworks.opf.model.Model) The loaded model instance. + :returns: (:class:`nupic.frameworks.opf.model.Model`) The loaded model + instance. """ if newSerialization: return HTMPredictionModel.readFromCheckpoint(savedModelDir) diff --git a/src/nupic/frameworks/opf/previous_value_model.py b/src/nupic/frameworks/opf/previous_value_model.py index 9aac0f3ec1..db5d651726 100644 --- a/src/nupic/frameworks/opf/previous_value_model.py +++ b/src/nupic/frameworks/opf/previous_value_model.py @@ -30,23 +30,29 @@ class PreviousValueModel(model.Model): - """Previous value model.""" + """ + Previous value model. + + :param inferenceType: (:class:`nupic.frameworks.opf.opf_utils.InferenceType`) + + :param fieldNames: a list of field names + + :param fieldTypes: a list of the types for the fields mentioned in + ``fieldNames`` + + :param predictedField: the field from ``fieldNames`` which is to be predicted + + :param predictionSteps: a list of steps for which a prediction is made. This is + only needed in the case of multi step predictions. For example, to get + predictions 1, 5, and 10 steps ahead: ``[1,5,10]``. + + """ def __init__(self, inferenceType=InferenceType.TemporalNextStep, fieldNames=[], fieldTypes=[], predictedField=None, predictionSteps=[]): - """ PVM constructor. - - inferenceType: An opf_utils.InferenceType value that specifies what type of - inference (i.e. TemporalNextStep, TemporalMultiStep, etc.) - fieldNames: a list of field names - fieldTypes: a list of the types for the fields mentioned in fieldNames - predictedField: the field from fieldNames which is to be predicted - predictionSteps: a list of steps for which a prediction is made. This is - only needed in the case of multi step predictions - """ super(PreviousValueModel, self).__init__(inferenceType) self._logger = opf_utils.initLogger(self) @@ -63,18 +69,6 @@ def __init__(self, inferenceType=InferenceType.TemporalNextStep, assert False, "Previous Value Model only works for next step or multi-step." def run(self, inputRecord): - """Run one iteration of this model. - - Args: - inputRecord: A record object formatted according to - nupic.data.FileSource.getNext() result format. - - Returns: - A ModelResult named tuple (see opf_utils.py). The contents of - ModelResult.inferences depends on the specific inference type of this - model, which can be queried by getInferenceType(). - TODO: Implement getInferenceType()? - """ # set the results. note that there is no translation to sensorInput results = super(PreviousValueModel, self).run(inputRecord) results.sensorInput = opf_utils.SensorInput(dataRow= \ @@ -97,8 +91,7 @@ def run(self, inputRecord): return results def finishLearning(self): - """Places the model in a permanent "finished learning" mode. - + """ The PVM does not learn, so this function has no effect. """ pass @@ -106,50 +99,25 @@ def finishLearning(self): def setFieldStatistics(self,fieldStats): """ - This method is used for the data source to communicate to the - model any statistics that it knows about the fields Since the PVM has no use for this information, this is a no-op """ pass def getFieldInfo(self): - """Returns the metadata specifying the format of the model's output. - - The result may be different than the list of - nupic.data.fieldmeta.FieldMetaInfo objects supplied at initialization due - to the transcoding of some input fields into meta- fields, such as - datetime -> dayOfWeek, timeOfDay, etc. - """ return tuple(field_meta.FieldMetaInfo(*args) for args in itertools.izip( self._fieldNames, self._fieldTypes, itertools.repeat(field_meta.FieldMetaSpecial.none))) def getRuntimeStats(self): - """Get the runtime statistics specific to the model. - - I.E. activeCellOverlapAvg - - Returns: - A dict mapping statistic names to values. - """ # TODO: Add debugging stats. # > what sort of stats are we supposed to return? return dict() def _getLogger(self): - """Get the logger created by this subclass. - - Returns: - A logging.Logger object. Should not be None. - """ return self._logger def resetSequenceStates(self): - """Called to indicate the start of a new sequence. - - The next call to run should not perform learning. - """ self._reset = True def __getstate__(self): diff --git a/src/nupic/frameworks/opf/two_gram_model.py b/src/nupic/frameworks/opf/two_gram_model.py index 689e296e2c..f396ec30bc 100644 --- a/src/nupic/frameworks/opf/two_gram_model.py +++ b/src/nupic/frameworks/opf/two_gram_model.py @@ -32,16 +32,17 @@ class TwoGramModel(model.Model): - """Two-gram benchmark model.""" + """ + Two-gram benchmark model. + + :param inferenceType: (:class:`nupic.frameworks.opf.opf_utils.InferenceType`) + :param encoders: a dict of dicts, eventually sent to + :meth:`~nupic.encoders.multi.MultiEncoder.addMultipleEncoders` (see + there for details). + """ def __init__(self, inferenceType=InferenceType.TemporalNextStep, encoderParams=()): - """ Two-gram model constructor. - - inferenceType: An opf_utils.InferenceType value that specifies what type of - inference (i.e. TemporalNextStep, Classification, etc.) - encoders: Sequence of encoder params dictionaries. - """ super(TwoGramModel, self).__init__(inferenceType) self._logger = opf_utils.initLogger(self) @@ -54,18 +55,6 @@ def __init__(self, inferenceType=InferenceType.TemporalNextStep, self._twoGramDicts = [dict() for _ in xrange(len(self._fieldNames))] def run(self, inputRecord): - """Run one iteration of this model. - - Args: - inputRecord: A record object formatted according to - nupic.data.FileSource.getNext() result format. - - Returns: - A ModelResult named tuple (see opf_utils.py). The contents of - ModelResult.inferences depends on the specific inference type of this - model, which can be queried by getInferenceType(). - TODO: Implement getInferenceType()? - """ results = super(TwoGramModel, self).run(inputRecord) # Set up the lists of values, defaults, and encoded values. @@ -118,31 +107,15 @@ def run(self, inputRecord): return results def finishLearning(self): - """Places the model in a permanent "finished learning" mode. - - Once called, the model will not be able to learn from subsequent input - records. Learning may not be resumed on a given instance of the model once - this is called as the implementation may optimize itself by pruning data - structures that are necessary for learning. - """ self._learningEnabled = False def setFieldStatistics(self,fieldStats): """ - This method is used for the data source to communicate to the - model any statistics that it knows about the fields Since the two-gram has no use for this information, this is a no-op """ pass def getFieldInfo(self): - """Returns the metadata specifying the format of the model's output. - - The result may be different than the list of - nupic.data.fieldmeta.FieldMetaInfo objects supplied at initialization due - to the transcoding of some input fields into meta- fields, such as - datetime -> dayOfWeek, timeOfDay, etc. - """ fieldTypes = self._encoder.getDecoderOutputFieldTypes() assert len(self._fieldNames) == len(fieldTypes) @@ -152,29 +125,13 @@ def getFieldInfo(self): itertools.repeat(field_meta.FieldMetaSpecial.none))) def getRuntimeStats(self): - """Get the runtime statistics specific to the model. - - I.E. activeCellOverlapAvg - - Returns: - A dict mapping statistic names to values. - """ # TODO: Add debugging stats. return dict() def _getLogger(self): - """Get the logger created by this subclass. - - Returns: - A logging.Logger object. Should not be None. - """ return self._logger def resetSequenceStates(self): - """Called to indicate the start of a new sequence. - - The next call to run should not perform learning. - """ self._reset = True def __getstate__(self): From 227353b88d69a08f028101eb7fbc41206796faf4 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 14:28:22 -0700 Subject: [PATCH 07/16] Updated doc README --- docs/README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/README.md b/docs/README.md index a5c1fa8c94..01861b6d69 100644 --- a/docs/README.md +++ b/docs/README.md @@ -124,9 +124,7 @@ nupic │   └── sparse_pass_through_encoder.py [OK] ├── frameworks │   ├── opf -│   │   ├── htm_prediction_model.py [TODO] -│   │   ├── htm_prediction_model_classifier_helper.py [TODO] -│   │   ├── htm_prediction_model_callbacks.py [TODO] +│   │   ├── htm_prediction_model.py [OK] │   │   ├── client.py [TODO] │   │   ├── common_models │   │   │   └── cluster_params.py [TODO] @@ -135,9 +133,8 @@ nupic │   │   ├── exp_description_helpers.py [TODO] │   │   ├── experiment_runner.py [TODO] │   │   ├── metrics.py [TODO] -│   │   ├── model.py [TODO] -│   │   ├── model_callbacks.py [TODO] -│   │   ├── model_factory.py [TODO] +│   │   ├── model.py [OK] +│   │   ├── model_factory.py [OK] │   │   ├── opf_basic_environment.py [TODO] │   │   ├── opf_environment.py [TODO] │   │   ├── opf_helpers.py [TODO] @@ -147,9 +144,9 @@ nupic │ │ │ └── SensorInput [OK] │   │   ├── periodic.py [TODO] │   │   ├── prediction_metrics_manager.py [TODO] -│   │   ├── previous_value_model.py [TODO] +│   │   ├── previous_value_model.py [OK] │   │   ├── safe_interpreter.py [TODO] -│   │   └── two_gram_model.py [TODO] +│   │   └── two_gram_model.py [OK] │   └── viz │   ├── dot_renderer.py [TODO] │   ├── examples From 7a93be5eb3b369f92e96e491a853d3a05a13945f Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 14:55:21 -0700 Subject: [PATCH 08/16] Docced nupic.data.utils --- docs/README.md | 4 +- docs/source/api/data/index.rst | 9 +++ src/nupic/data/utils.py | 106 ++++++++++++++++++++++++++------- 3 files changed, 94 insertions(+), 25 deletions(-) diff --git a/docs/README.md b/docs/README.md index a5c1fa8c94..103ff2ff64 100644 --- a/docs/README.md +++ b/docs/README.md @@ -98,14 +98,12 @@ nupic │   ├── fieldmeta.py [OK] │   ├── file_record_stream.py [OK] │   ├── inference_shifter.py [OK] -│   ├── joiner.py [TODO] -│   ├── jsonhelpers.py [TODO] │   ├── record_stream.py [TODO] │   ├── sorter.py [TODO] │   ├── stats.py [TODO] │   ├── stats_v2.py [TODO] │   ├── stream_reader.py [TODO] -│   └── utils.py [TODO] +│   └── utils.py [OK] ├── encoders │   ├── adaptivescalar.py [OK] │   ├── base.py [OK] diff --git a/docs/source/api/data/index.rst b/docs/source/api/data/index.rst index 9977e2b182..0f5b1cd3b5 100644 --- a/docs/source/api/data/index.rst +++ b/docs/source/api/data/index.rst @@ -12,3 +12,12 @@ FileRecordStream .. autoclass:: nupic.data.file_record_stream.FileRecordStream :members: + +Utilities +^^^^^^^^^ + +.. automodule:: nupic.data.utils + :members: + +.. autodata:: nupic.data.utils.DATETIME_FORMATS + :annotation: diff --git a/src/nupic/data/utils.py b/src/nupic/data/utils.py index a0df90c752..e405d74e10 100644 --- a/src/nupic/data/utils.py +++ b/src/nupic/data/utils.py @@ -25,31 +25,37 @@ import datetime import string -# Workaround for this error: -# "ImportError: Failed to import _strptime because the import lockis held by +# Workaround for this error: +# "ImportError: Failed to import _strptime because the import lockis held by # another thread" -# These are the supported timestamp formats to parse. The first is used for -# serializing datetimes. Functions in this file rely on specific formats from -# this tuple so be careful when changing the indices for existing formats. DATETIME_FORMATS = ('%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S:%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M', '%Y-%m-%d', '%m/%d/%Y %H:%M', '%m/%d/%y %H:%M', '%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S') +""" +These are the supported timestamp formats to parse. The first is the format used +by NuPIC when serializing datetimes. +""" def parseTimestamp(s): - """Parses a textual datetime format and return a Python datetime object. + """ + Parses a textual datetime format and return a Python datetime object. + + The supported format is: ``yyyy-mm-dd h:m:s.ms`` + + The time component is optional. - The supported format is: yyyy-mm-dd h:m:s.ms + - hours are 00..23 (no AM/PM) + - minutes are 00..59 + - seconds are 00..59 + - micro-seconds are 000000..999999 - The time component is optional - hours are 00..23 (no AM/PM) - minutes are 00..59 - seconds are 00..59 - micro-seconds are 000000..999999 + :param s: (string) input time text + :return: (datetime.datetime) """ s = s.strip() for pattern in DATETIME_FORMATS: @@ -63,16 +69,34 @@ def parseTimestamp(s): def serializeTimestamp(t): + """ + Turns a datetime object into a string. + + :param t: (datetime.datetime) + :return: (string) in default format (see :const:`DATETIME_FORMATS`[0]) + """ return t.strftime(DATETIME_FORMATS[0]) def serializeTimestampNoMS(t): + """ + Turns a datetime object into a string ignoring milliseconds. + + :param t: (datetime.datetime) + :return: (string) in default format (see :const:`DATETIME_FORMATS`[2]) + """ return t.strftime(DATETIME_FORMATS[2]) def parseBool(s): + """ + String to boolean + + :param s: (string) + :return: (bool) + """ l = s.lower() if l in ("true", "t", "1"): return True @@ -83,6 +107,12 @@ def parseBool(s): def floatOrNone(f): + """ + Tries to convert input to a float input or returns ``None``. + + :param f: (object) thing to convert to a float + :return: (float or ``None``) + """ if f == 'None': return None return float(f) @@ -90,6 +120,12 @@ def floatOrNone(f): def intOrNone(i): + """ + Tries to convert input to a int input or returns ``None``. + + :param f: (object) thing to convert to a int + :return: (int or ``None``) + """ if i.strip() == 'None' or i.strip() == 'NULL': return None return int(i) @@ -97,13 +133,17 @@ def intOrNone(i): def escape(s): - """Escape commas, tabs, newlines and dashes in a string + """ + Escape commas, tabs, newlines and dashes in a string - Commas are encoded as tabs + Commas are encoded as tabs. + + :param s: (string) to escape + :returns: (string) escaped string """ if s is None: return '' - + assert isinstance(s, basestring), \ "expected %s but got %s; value=%s" % (basestring, type(s), s) s = s.replace('\\', '\\\\') @@ -115,9 +155,13 @@ def escape(s): def unescape(s): - """Unescapes a string that may contain commas, tabs, newlines and dashes + """ + Unescapes a string that may contain commas, tabs, newlines and dashes + + Commas are decoded from tabs. - Commas are decoded from tabs + :param s: (string) to unescape + :returns: (string) unescaped string """ assert isinstance(s, basestring) s = s.replace('\t', ',') @@ -130,7 +174,11 @@ def unescape(s): def parseSdr(s): - """Parses a string containing only 0's and 1's and return a Python list object. + """ + Parses a string containing only 0's and 1's and return a Python list object. + + :param s: (string) string to parse + :returns: (list) SDR out """ assert isinstance(s, basestring) sdr = [int(c) for c in s if c in ("0", "1")] @@ -143,7 +191,11 @@ def parseSdr(s): def serializeSdr(sdr): - """Serialize Python list object containing only 0's and 1's to string. + """ + Serialize Python list object containing only 0's and 1's to string. + + :param sdr: (list) binary + :returns: (string) SDR out """ return "".join(str(bit) for bit in sdr) @@ -151,13 +203,23 @@ def serializeSdr(sdr): def parseStringList(s): - """Parse a string of space-separated numbers, returning a Python list.""" + """ + Parse a string of space-separated numbers, returning a Python list. + + :param s: (string) to parse + :returns: (list) binary SDR + """ assert isinstance(s, basestring) return [int(i) for i in s.split()] def stripList(listObj): - """Convert a list of numbers to a string of space-separated numbers.""" + """ + Convert a list of numbers to a string of space-separated values. + + :param listObj: (list) to convert + :returns: (string) of space-separated values + """ return " ".join(str(i) for i in listObj) - + From 79f5f8152ca98c792f745218d37a3e26fe9e3067 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 15:59:35 -0700 Subject: [PATCH 09/16] Docced RecordStream --- docs/source/api/data/index.rst | 8 ++ src/nupic/data/record_stream.py | 167 +++++++++++++++++++++----------- 2 files changed, 120 insertions(+), 55 deletions(-) diff --git a/docs/source/api/data/index.rst b/docs/source/api/data/index.rst index f0f57ba47f..10e598ea52 100644 --- a/docs/source/api/data/index.rst +++ b/docs/source/api/data/index.rst @@ -20,3 +20,11 @@ RecordStream .. autoclass:: nupic.data.record_stream.RecordStreamIface :members: + +StreamReader +^^^^^^^^^^^^ + +.. automodule:: nupic.data.stream_reader + +.. autoclass:: nupic.data.stream_reader.StreamReader + :members: diff --git a/src/nupic/data/record_stream.py b/src/nupic/data/record_stream.py index 5c65a4b387..992e3315ed 100644 --- a/src/nupic/data/record_stream.py +++ b/src/nupic/data/record_stream.py @@ -46,7 +46,7 @@ def _getFieldIndexBySpecial(fields, special): class ModelRecordEncoder(object): - """Encodes metric data input rows for consumption by OPF models. See + """Encodes metric data input rows for consumption by OPF models. See the `ModelRecordEncoder.encode` method for more details. """ @@ -212,7 +212,9 @@ def _computeTimestampRecordIdx(self, recordTS): class RecordStreamIface(object): - """This is the interface for the record input/output storage classes.""" + """ + This is the interface for the record input/output storage classes. + """ __metaclass__ = ABCMeta @@ -230,20 +232,18 @@ def close(self): def rewind(self): - """Put us back at the beginning of the file again) """ + """Put us back at the beginning of the file again. """ if self._modelRecordEncoder is not None: self._modelRecordEncoder.rewind() @abstractmethod def getNextRecord(self, useCache=True): - """Returns next available data record from the storage. If useCache is - False, then don't read ahead and don't cache any records. - - Raises nupic.support.exceptions.StreamDisappearedError if stream - disappears (e.g., gets garbage-collected). + """ + Returns next available data record from the storage. If ``useCache`` is + ``False``, then don't read ahead and don't cache any records. - retval: a data row (a list or tuple) if available; None, if no more records + :return: a data row (a list or tuple) if available; None, if no more records in the table (End of Stream - EOS); empty sequence (list or tuple) when timing out while waiting for the next record. """ @@ -252,9 +252,10 @@ def getNextRecord(self, useCache=True): def getNextRecordDict(self): """Returns next available data record from the storage as a dict, with the keys being the field names. This also adds in some meta fields: - '_category': The value from the category field (if any) - '_reset': True if the reset field was True (if any) - '_sequenceId': the value from the sequenceId field (if any) + + - ``_category``: The value from the category field (if any) + - ``_reset``: True if the reset field was True (if any) + - ``_sequenceId``: the value from the sequenceId field (if any) """ @@ -275,51 +276,57 @@ def getNextRecordDict(self): def getAggregationMonthsAndSeconds(self): - """ Returns the aggregation period of the record stream as a dict + """ + Returns the aggregation period of the record stream as a dict containing 'months' and 'seconds'. The months is always an integer and seconds is a floating point. Only one is allowed to be non-zero. If there is no aggregation associated with the stream, returns None. Typically, a raw file or hbase stream will NOT have any aggregation info, - but subclasses of RecordStreamIFace, like StreamReader, will and will - return the aggregation period from this call. This call is used by the - getNextRecordDict() method to assign a record number to a record given - its timestamp and the aggregation interval + but subclasses of RecordStreamIFace, like :class:`~nupic.data.StreamReader`, + will and will return the aggregation period from this call. This call is + used by the :meth:`getNextRecordDict` method to assign a record number to a + record given its timestamp and the aggregation interval - Parameters: - ------------------------------------------------------------------------ - retval: aggregationPeriod (as a dict) or None - 'months': number of months in aggregation period - 'seconds': number of seconds in aggregation period (as a float) + :returns: ``None`` """ return None @abstractmethod def getRecordsRange(self, bookmark=None, range=None): - """Returns a range of records, starting from the bookmark. If 'bookmark' + """ + Returns a range of records, starting from the bookmark. If 'bookmark' is None, then records read from the first available. If 'range' is None, all available records will be returned (caution: this could be a lot of records and require a lot of memory). + + :param bookmark: represents a bookmark to start from + :param range: (int) how many to get + :return: (list) of records """ @abstractmethod def getNextRecordIdx(self): - """Returns the index of the record that will be read next from - getNextRecord() + """ + :returns: (int) index of the record that will be read next from + :meth:`getNextRecord` """ @abstractmethod def getLastRecords(self, numRecords): - """Returns a tuple (successCode, recordsArray), where - successCode - if the stream had enough records to return, True/False - recordsArray - an array of last numRecords records available when - the call was made. Records appended while in the - getLastRecords will be not returned until the next - call to either getNextRecord() or getLastRecords() + """ + :param numRecords: (int) number of records from end to return + :returns: a tuple (``successCode``, ``recordsArray``), where: + + - ``successCode`` - if the stream had enough records to return, True/False + - ``recordsArray`` - an array of last numRecords records available when + the call was made. Records appended while in the + getLastRecords will be not returned until the next + call to either getNextRecord() or getLastRecords() """ @@ -330,12 +337,23 @@ def removeOldData(self): @abstractmethod def appendRecord(self, record, inputRef=None): - """Saves the record in the underlying storage.""" + """ + Saves the record in the underlying storage. Should be implemented in + subclasses. + + :param record: (object) to store + """ @abstractmethod def appendRecords(self, records, inputRef=None, progressCB=None): - """Saves multiple records in the underlying storage.""" + """ + Saves multiple records in the underlying storage. Should be implemented in + subclasses. + + :param records: (list) of objects to store + :param progressCB: (func) called after each appension + """ @abstractmethod @@ -344,30 +362,42 @@ def getBookmark(self): anchor to the constructor makes the current position to be the first returned record. If record is no longer in the storage, the first available after it will be returned. + + :returns: anchor to current position in the data. """ @abstractmethod def recordsExistAfter(self, bookmark): - """Returns True iff there are records left after the bookmark.""" + """ + :param bookmark: (int) where to start + :returns: True if there are records left after the bookmark. + """ @abstractmethod def seekFromEnd(self, numRecords): - """Returns a bookmark numRecords from the end of the stream.""" + """ + :param numRecords: (int) number of records from the end. + :returns: (int) a bookmark numRecords from the end of the stream. + """ @abstractmethod def getStats(self): - """Returns storage stats (like min and max values of the fields).""" + """ + :returns: storage stats (like min and max values of the fields). + """ def getFieldMin(self, fieldName): - """ Returns current minimum value for the field 'fieldName'. - + """ If underlying implementation does not support min/max stats collection, or if a field type does not support min/max (non scalars), the return value will be None. + + :param fieldName: (string) name of field to get max + :returns: current minimum value for the field ``fieldName``. """ stats = self.getStats() if stats == None: @@ -380,11 +410,13 @@ def getFieldMin(self, fieldName): def getFieldMax(self, fieldName): - """ Returns current maximum value for the field 'fieldName'. - + """ If underlying implementation does not support min/max stats collection, or if a field type does not support min/max (non scalars), the return value will be None. + + :param fieldName: (string) name of field to get max + :returns: current maximum value for the field ``fieldName``. """ stats = self.getStats() if stats == None: @@ -403,68 +435,93 @@ def clearStats(self): @abstractmethod def getError(self): - """Returns errors saved in the storage.""" + """:returns: errors saved in the storage.""" @abstractmethod def setError(self, error): - """Saves specified error in the storage.""" + """ + Saves specified error in the storage. + + :param error: Error to store. + """ @abstractmethod def isCompleted(self): - """Returns True if all records are already in the storage or False - if more records is expected. + """ + :returns: True if all records are already in the storage or False + if more records is expected. """ @abstractmethod def setCompleted(self, completed): - """Marks the stream completed (True or False).""" + """ + Marks the stream completed. + + :param completed: (bool) is completed? + """ @abstractmethod def getFieldNames(self): - """Returns an array of field names associated with the data.""" + """ + :returns: (list) of field names associated with the data. + """ @abstractmethod def getFields(self): - """Returns a sequence of nupic.data.fieldmeta.FieldMetaInfo - name/type/special tuples for each field in the stream. Might be None, if - that information is provided externally (thru stream def, for example). + """ + :returns: (list) of :class:`nupic.data.fieldmeta.FieldMetaInfo`objects for + each field in the stream. Might be None, if that information is provided + externally (thru stream def, for example). """ def getResetFieldIdx(self): """ - :returns: index of the 'reset' field; None if no such field. """ + :returns: (int) index of the ``reset`` field; ``None`` if no such field. + """ return _getFieldIndexBySpecial(self.getFields(), FieldMetaSpecial.reset) def getTimestampFieldIdx(self): - """ Return index of the 'timestamp' field. """ + """ + :returns: (int) index of the ``timestamp`` field. + """ return _getFieldIndexBySpecial(self.getFields(), FieldMetaSpecial.timestamp) def getSequenceIdFieldIdx(self): - """ Return index of the 'sequenceId' field. """ + """ + :returns: (int) index of the ``sequenceId`` field. + """ return _getFieldIndexBySpecial(self.getFields(), FieldMetaSpecial.sequence) def getCategoryFieldIdx(self): - """ Return index of the 'category' field. """ + """ + :returns: (int) index of ``category`` field + """ return _getFieldIndexBySpecial(self.getFields(), FieldMetaSpecial.category) def getLearningFieldIdx(self): - """ Return index of the 'learning' field. """ + """ + :returns: (int) index of the ``learning ``field. + """ return _getFieldIndexBySpecial(self.getFields(), FieldMetaSpecial.learning) @abstractmethod def setTimeout(self, timeout): - """ Set the read timeout in seconds (int or floating point) """ + """ + Set the read timeout in seconds + + :param timeout: (int or floating point) + """ @abstractmethod From 3c77ac2774ee6ec240420cb8d780711ecc29d9e3 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 17:10:53 -0700 Subject: [PATCH 10/16] Docced stream reader --- docs/README.md | 2 +- src/nupic/data/stream_reader.py | 205 +++++++++++++++++--------------- 2 files changed, 107 insertions(+), 100 deletions(-) diff --git a/docs/README.md b/docs/README.md index 103ff2ff64..cd74b457a4 100644 --- a/docs/README.md +++ b/docs/README.md @@ -102,7 +102,7 @@ nupic │   ├── sorter.py [TODO] │   ├── stats.py [TODO] │   ├── stats_v2.py [TODO] -│   ├── stream_reader.py [TODO] +│   ├── stream_reader.py [OK] │   └── utils.py [OK] ├── encoders │   ├── adaptivescalar.py [OK] diff --git a/src/nupic/data/stream_reader.py b/src/nupic/data/stream_reader.py index 60f1984ab9..a9a40e5ee2 100644 --- a/src/nupic/data/stream_reader.py +++ b/src/nupic/data/stream_reader.py @@ -50,86 +50,88 @@ class StreamTimeoutException(Exception): class StreamReader(RecordStreamIface): """ Implements a stream reader. This is a high level class that owns one or more - underlying implementations of a RecordStreamIFace. Each RecordStreamIFace - implements the raw reading of records from the record store (which could be a - file, hbase table or something else). + underlying implementations of a + :class:`~nupic.data.record_stream.RecordStreamIFace`. Each + :class:`~nupic.data.record_stream.RecordStreamIFace` implements the raw + reading of records from the record store (which could be a file, hbase table + or something else). - In the future, we will support joining of two or more RecordStreamIface's ( - which is why the streamDef accepts a list of 'stream' elements), but for now - only 1 source is supported. + In the future, we will support joining of two or more + :class:`~nupic.data.record_stream.RecordStreamIFace`'s (which is why the + ``streamDef`` accepts a list of 'stream' elements), but for now only 1 source + is supported. The class also implements aggregation of the (in the future) joined records from the sources. This module parses the stream definition (as defined in - /nupic/frameworks/opf/jsonschema/stream_def.json), creates the - RecordStreamIFace for each source ('stream's element) defined in the stream - def, performs aggregation, and returns each record in the correct format - according to the desired column names specified in the streamDef. + ``/src/nupic/frameworks/opf/jsonschema/stream_def.json``), creates the + :class:`~nupic.data.record_stream.RecordStreamIFace` for each source + ('stream' element) defined in the stream def, performs aggregation, and + returns each record in the correct format according to the desired column + names specified in the streamDef. - This class implements the RecordStreamIFace interface and thus can be used - in place of a raw record stream. + This class implements the :class:`~nupic.data.record_stream.RecordStreamIFace` + interface and thus can be used in place of a raw record stream. This is an example streamDef: - { - 'version': 1 - 'info': 'test_hotgym', - - 'streams': [ - {'columns': [u'*'], - 'info': u'hotGym.csv', - 'last_record': 4000, - 'source': u'file://extra/hotgym/hotgym.csv'}. - ], - - 'timeField': 'timestamp', - - 'aggregation': { - 'hours': 1, - 'fields': [ - ('timestamp', 'first'), - ('gym', 'first'), - ('consumption', 'sum') - ], - } - - } - """ + .. code-block:: python + { + 'version': 1 + 'info': 'test_hotgym', - def __init__(self, streamDef, bookmark=None, saveOutput=False, - isBlocking=True, maxTimeout=0, eofOnTimeout=False): - """ Base class constructor, performs common initialization + 'streams': [ + {'columns': [u'*'], + 'info': u'hotGym.csv', + 'last_record': 4000, + 'source': u'file://extra/hotgym/hotgym.csv'}. + ], - Parameters: - ---------------------------------------------------------------- - streamDef: The stream definition, potentially containing multiple sources - (not supported yet). See - /nupic/frameworks/opf/jsonschema/stream_def.json for the format - of this dict + 'timeField': 'timestamp', - bookmark: Bookmark to start reading from. This overrides the first_record - field of the streamDef if provided. + 'aggregation': { + 'hours': 1, + 'fields': [ + ('timestamp', 'first'), + ('gym', 'first'), + ('consumption', 'sum') + ], + } - saveOutput: If true, save the output to a csv file in a temp directory. - The path to the generated file can be found in the log - output. + } - isBlocking: should read operation block *forever* if the next row of data - is not available, but the stream is not marked as 'completed' - yet? - maxTimeout: if isBlocking is False, max seconds to wait for more data before - timing out; ignored when isBlocking is True. + :param streamDef: The stream definition, potentially containing multiple + sources (not supported yet). See + ``src//nupic/frameworks/opf/jsonschema/stream_def.json`` for the + format of this dict - eofOnTimeout: If True and we get a read timeout (isBlocking must be False - to get read timeouts), assume we've reached the end of the - input and produce the last aggregated record, if one can be - completed. + :param bookmark: Bookmark to start reading from. This overrides the + first_record field of the streamDef if provided. - """ + :param saveOutput: If true, save the output to a csv file in a temp + directory. The path to the generated file can be found in the log + output. + + :param isBlocking: should read operation block *forever* if the next row of + data is not available, but the stream is not marked as 'completed' + yet? + + :param maxTimeout: if isBlocking is False, max seconds to wait for more data + before timing out; ignored when isBlocking is True. + + :param eofOnTimeout: If True and we get a read timeout (isBlocking must be + False to get read timeouts), assume we've reached the end of the + input and produce the last aggregated record, if one can be + completed. + + """ + + def __init__(self, streamDef, bookmark=None, saveOutput=False, + isBlocking=True, maxTimeout=0, eofOnTimeout=False): # Call superclass constructor super(StreamReader, self).__init__() @@ -304,7 +306,8 @@ def close(self): def getNextRecord(self): """ Returns combined data from all sources (values only). - Returns None on EOF; empty sequence on timeout. + + :returns: None on EOF; empty sequence on timeout. """ @@ -370,7 +373,8 @@ def getNextRecord(self): def getDataRowCount(self): - """Iterates through stream to calculate total records after aggregation. + """ + Iterates through stream to calculate total records after aggregation. This will alter the bookmark state. """ inputRowCountAfterAggregation = 0 @@ -385,28 +389,25 @@ def getDataRowCount(self): def getLastRecords(self, numRecords): - """Saves the record in the underlying storage.""" raise RuntimeError("Not implemented in StreamReader") def getRecordsRange(self, bookmark=None, range=None): - """ Returns a range of records, starting from the bookmark. If 'bookmark' - is None, then records read from the first available. If 'range' is - None, all available records will be returned (caution: this could be - a lot of records and require a lot of memory). - """ raise RuntimeError("Not implemented in StreamReader") def getNextRecordIdx(self): - """Returns the index of the record that will be read next from - getNextRecord() + """ + :returns: the index of the record that will be read next from + :meth:`getNextRecord`. """ return self._recordCount def recordsExistAfter(self, bookmark): - """Returns True iff there are records left after the bookmark.""" + """ + :returns: True if there are records left after the bookmark. + """ return self._recordStore.recordsExistAfter(bookmark) @@ -416,30 +417,25 @@ def getAggregationMonthsAndSeconds(self): seconds is a floating point. Only one is allowed to be non-zero at a time. - If there is no aggregation associated with the stream, returns None. + Will return the aggregation period from this call. This call is + used by the :meth:`nupic.data.record_stream.RecordStream.getNextRecordDict` + method to assign a record number to a record given its timestamp and the + aggregation interval. - Typically, a raw file or hbase stream will NOT have any aggregation info, - but subclasses of RecordStreamIFace, like StreamReader, will and will - return the aggregation period from this call. This call is used by the - getNextRecordDict() method to assign a record number to a record given - its timestamp and the aggregation interval + :returns: aggregationPeriod (as a dict) where: - Parameters: - ------------------------------------------------------------------------ - retval: aggregationPeriod (as a dict) or None - 'months': number of months in aggregation period - 'seconds': number of seconds in aggregation period (as a float) + - ``months``: number of months in aggregation period + - ``seconds``: number of seconds in aggregation period + (as a float) """ return self._aggMonthsAndSeconds def appendRecord(self, record, inputRef=None): - """Saves the record in the underlying storage.""" raise RuntimeError("Not implemented in StreamReader") def appendRecords(self, records, inputRef=None, progressCB=None): - """Saves multiple records in the underlying storage.""" raise RuntimeError("Not implemented in StreamReader") @@ -448,28 +444,29 @@ def removeOldData(self): def seekFromEnd(self, numRecords): - """Seeks to numRecords from the end and returns a bookmark to the new - position. - """ raise RuntimeError("Not implemented in StreamReader") def getFieldNames(self): - """ Returns all fields in all inputs (list of plain names). - NOTE: currently, only one input is supported + """ + Returns all fields in all inputs (list of plain names). + + .. note:: currently, only one input is supported """ return [f.name for f in self._streamFields] def getFields(self): - """ Returns a sequence of nupic.data.fieldmeta.FieldMetaInfo - name/type/special tuples for each field in the stream. + """ + :returns: a sequence of :class:`nupic.data.fieldmeta.FieldMetaInfo` for each + field in the stream. """ return self._streamFields def getBookmark(self): - """ Returns a bookmark to the current position + """ + :returns: a bookmark to the current position """ return self._aggBookmark @@ -481,10 +478,11 @@ def clearStats(self): def getStats(self): - """ Returns stats (like min and max values of the fields). - + """ TODO: This method needs to be enhanced to get the stats on the *aggregated* records. + + :returns: stats (like min and max values of the fields). """ # The record store returns a dict of stats, each value in this dict is @@ -508,37 +506,46 @@ def getStats(self): def getError(self): - """ Returns errors saved in the stream. + """ + :returns: errors saved in the stream. """ return self._recordStore.getError() def setError(self, error): """ Saves specified error in the stream. + + :param error: to save """ self._recordStore.setError(error) def isCompleted(self): - """ Returns True if all records have been read. + """ + :returns: True if all records have been read. """ return self._recordStore.isCompleted() def setCompleted(self, completed=True): - """ Marks the stream completed (True or False) + """ + Marks the stream completed (True or False) + + :param completed: (bool) is completed or not """ # CSV file is always considered completed, nothing to do self._recordStore.setCompleted(completed) def setTimeout(self, timeout): - """ Set the read timeout """ + """Set the read timeout. + + :param timeout: (float or int) timeout length + """ self._recordStore.setTimeout(timeout) def flush(self): - """ Flush the file to disk """ raise RuntimeError("Not implemented in StreamReader") From 767be5ae37296a01ac4eee405f1d329904a7df96 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 17:46:33 -0700 Subject: [PATCH 11/16] Updated doc readme for nupic.data updates. --- docs/README.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/README.md b/docs/README.md index d258f84988..2a9f8ce80b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -93,15 +93,10 @@ nupic │   ├── temporal_memory.py [TODO] │   └── temporal_memory_shim.py [TODO] ├── data -│   ├── aggregator.py [DEFER] -│   ├── dictutils.py [DEFER] │   ├── fieldmeta.py [OK] │   ├── file_record_stream.py [OK] │   ├── inference_shifter.py [OK] -│   ├── record_stream.py [TODO] -│   ├── sorter.py [TODO] -│   ├── stats.py [TODO] -│   ├── stats_v2.py [TODO] +│   ├── record_stream.py [OK] │   ├── stream_reader.py [OK] │   └── utils.py [OK] ├── encoders From 08541d3107b71c2701ad976d0780d7bb63c4fb6e Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 18:08:22 -0700 Subject: [PATCH 12/16] Remove unused argument from appendRecord --- src/nupic/data/file_record_stream.py | 9 ++------- src/nupic/data/record_stream.py | 4 ++-- src/nupic/data/stream_reader.py | 4 ++-- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/nupic/data/file_record_stream.py b/src/nupic/data/file_record_stream.py index 8344e97b95..0f8ec6e9e9 100644 --- a/src/nupic/data/file_record_stream.py +++ b/src/nupic/data/file_record_stream.py @@ -399,7 +399,7 @@ def removeOldData(self): raise Exception('removeOldData is not supported in this class.') - def appendRecord(self, record, inputBookmark=None): + def appendRecord(self, record): """ Saves the record in the underlying csv file. @@ -430,20 +430,15 @@ def appendRecord(self, record, inputBookmark=None): self._recordCount += 1 - def appendRecords(self, records, inputRef=None, progressCB=None): + def appendRecords(self, records, progressCB=None): """ Saves multiple records in the underlying storage. :param records: array of records as in :meth:`~.FileRecordStream.appendRecord` - :param inputRef: reference to the corresponding input (not applicable - in case of a file storage) :param progressCB: (function) callback to report progress """ - # input ref is not applicable in case of a file storage - inputRef = inputRef - for record in records: self.appendRecord(record, None) if progressCB is not None: diff --git a/src/nupic/data/record_stream.py b/src/nupic/data/record_stream.py index 992e3315ed..298998b8f5 100644 --- a/src/nupic/data/record_stream.py +++ b/src/nupic/data/record_stream.py @@ -336,7 +336,7 @@ def removeOldData(self): @abstractmethod - def appendRecord(self, record, inputRef=None): + def appendRecord(self, record): """ Saves the record in the underlying storage. Should be implemented in subclasses. @@ -346,7 +346,7 @@ def appendRecord(self, record, inputRef=None): @abstractmethod - def appendRecords(self, records, inputRef=None, progressCB=None): + def appendRecords(self, records, progressCB=None): """ Saves multiple records in the underlying storage. Should be implemented in subclasses. diff --git a/src/nupic/data/stream_reader.py b/src/nupic/data/stream_reader.py index a9a40e5ee2..7f8e45e2a0 100644 --- a/src/nupic/data/stream_reader.py +++ b/src/nupic/data/stream_reader.py @@ -431,11 +431,11 @@ def getAggregationMonthsAndSeconds(self): return self._aggMonthsAndSeconds - def appendRecord(self, record, inputRef=None): + def appendRecord(self, record): raise RuntimeError("Not implemented in StreamReader") - def appendRecords(self, records, inputRef=None, progressCB=None): + def appendRecords(self, records, progressCB=None): raise RuntimeError("Not implemented in StreamReader") From 56800d4604fabef5eafc5355c1d703ff4816532f Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Mon, 8 May 2017 18:45:27 -0700 Subject: [PATCH 13/16] Fixed broken test after removing appendRecord param --- src/nupic/data/file_record_stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nupic/data/file_record_stream.py b/src/nupic/data/file_record_stream.py index 0f8ec6e9e9..1f1eb92c0a 100644 --- a/src/nupic/data/file_record_stream.py +++ b/src/nupic/data/file_record_stream.py @@ -440,7 +440,7 @@ def appendRecords(self, records, progressCB=None): """ for record in records: - self.appendRecord(record, None) + self.appendRecord(record) if progressCB is not None: progressCB() From 66fc8308b7f511b1f8391a0887772f0c148f604b Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Tue, 9 May 2017 13:04:57 -0700 Subject: [PATCH 14/16] Missed a merge --- src/nupic/data/record_stream.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/nupic/data/record_stream.py b/src/nupic/data/record_stream.py index 515bc0f46d..f42315fba3 100644 --- a/src/nupic/data/record_stream.py +++ b/src/nupic/data/record_stream.py @@ -313,11 +313,7 @@ def appendRecord(self, record, inputRef=None): @abstractmethod -<<<<<<< HEAD def appendRecords(self, records, progressCB=None): -======= - def appendRecords(self, records, inputRef=None, progressCB=None): ->>>>>>> 3684bbd96d63dc12b7123a859e92d5980cb2fc95 """ Saves multiple records in the underlying storage. Should be implemented in subclasses. @@ -367,11 +363,7 @@ def getFieldMin(self, fieldName): or if a field type does not support min/max (non scalars), the return value will be None. -<<<<<<< HEAD - :param fieldName: (string) name of field to get max -======= :param fieldName: (string) name of field to get min ->>>>>>> 3684bbd96d63dc12b7123a859e92d5980cb2fc95 :returns: current minimum value for the field ``fieldName``. """ stats = self.getStats() @@ -449,15 +441,9 @@ def getFieldNames(self): @abstractmethod def getFields(self): """ -<<<<<<< HEAD - :returns: (list) of :class:`nupic.data.fieldmeta.FieldMetaInfo`objects for - each field in the stream. Might be None, if that information is provided - externally (thru stream def, for example). -======= :returns: (list) of :class:`nupic.data.fieldmeta.FieldMetaInfo` objects for each field in the stream. Might be None, if that information is provided externally (through stream def, for example). ->>>>>>> 3684bbd96d63dc12b7123a859e92d5980cb2fc95 """ @@ -491,11 +477,7 @@ def getCategoryFieldIdx(self): def getLearningFieldIdx(self): """ -<<<<<<< HEAD - :returns: (int) index of the ``learning ``field. -======= :returns: (int) index of the ``learning`` field. ->>>>>>> 3684bbd96d63dc12b7123a859e92d5980cb2fc95 """ return _getFieldIndexBySpecial(self.getFields(), FieldMetaSpecial.learning) From 25215e34b60e47dd69d8b3830172014313d712ff Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Tue, 9 May 2017 13:09:01 -0700 Subject: [PATCH 15/16] Fixing up links in RST docs --- src/nupic/data/record_stream.py | 9 +++++---- src/nupic/data/stream_reader.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/nupic/data/record_stream.py b/src/nupic/data/record_stream.py index f42315fba3..6aed1e777f 100644 --- a/src/nupic/data/record_stream.py +++ b/src/nupic/data/record_stream.py @@ -284,10 +284,11 @@ def getAggregationMonthsAndSeconds(self): If there is no aggregation associated with the stream, returns None. Typically, a raw file or hbase stream will NOT have any aggregation info, - but subclasses of RecordStreamIFace, like :class:`~nupic.data.StreamReader`, - will and will return the aggregation period from this call. This call is - used by the :meth:`getNextRecordDict` method to assign a record number to a - record given its timestamp and the aggregation interval + but subclasses of :class:`~nupic.data.record_stream.RecordStreamIface`, like + :class:`~nupic.data.stream_reader.StreamReader`, will and will return the + aggregation period from this call. This call is used by + :meth:`getNextRecordDict` to assign a record number to a record given its + timestamp and the aggregation interval. :returns: ``None`` """ diff --git a/src/nupic/data/stream_reader.py b/src/nupic/data/stream_reader.py index 49154ab362..b1d65d4d55 100644 --- a/src/nupic/data/stream_reader.py +++ b/src/nupic/data/stream_reader.py @@ -51,13 +51,13 @@ class StreamReader(RecordStreamIface): """ Implements a stream reader. This is a high level class that owns one or more underlying implementations of a - :class:`~nupic.data.record_stream.RecordStreamIFace`. Each - :class:`~nupic.data.record_stream.RecordStreamIFace` implements the raw + :class:`~nupic.data.record_stream.RecordStreamIface`. Each + :class:`~nupic.data.record_stream.RecordStreamIface` implements the raw reading of records from the record store (which could be a file, hbase table or something else). In the future, we will support joining of two or more - :class:`~nupic.data.record_stream.RecordStreamIFace`'s (which is why the + :class:`~nupic.data.record_stream.RecordStreamIface`'s (which is why the ``streamDef`` accepts a list of 'stream' elements), but for now only 1 source is supported. From 245feb6cd8f4c81eb57e7c5e3ccc8e98fd0538c5 Mon Sep 17 00:00:00 2001 From: Matthew Taylor Date: Tue, 9 May 2017 13:16:41 -0700 Subject: [PATCH 16/16] Removed dangling references to inputRef / bookmark --- src/nupic/data/file_record_stream.py | 2 +- src/nupic/data/record_stream.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nupic/data/file_record_stream.py b/src/nupic/data/file_record_stream.py index b81439ecf1..ea79618db6 100644 --- a/src/nupic/data/file_record_stream.py +++ b/src/nupic/data/file_record_stream.py @@ -387,7 +387,7 @@ def getNextRecord(self, useCache=True): return record - def appendRecord(self, record, inputBookmark=None): + def appendRecord(self, record): """ Saves the record in the underlying csv file. diff --git a/src/nupic/data/record_stream.py b/src/nupic/data/record_stream.py index 6aed1e777f..8c065ca6af 100644 --- a/src/nupic/data/record_stream.py +++ b/src/nupic/data/record_stream.py @@ -304,7 +304,7 @@ def getNextRecordIdx(self): @abstractmethod - def appendRecord(self, record, inputRef=None): + def appendRecord(self, record): """ Saves the record in the underlying storage. Should be implemented in subclasses.