Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SDR classifier Region #3669

Merged
merged 6 commits into from
Jun 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 66 additions & 43 deletions src/nupic/algorithms/sdr_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):

:param classification: Dict of the classification information where:

- bucketIdx: index of the encoder bucket
- actValue: actual value going into the encoder
- bucketIdx: list of indices of the encoder bucket
- actValue: list of actual values going into the encoder

Classification could be None for inference mode.
:param learn: (bool) if true, learn this sample
Expand Down Expand Up @@ -204,8 +204,15 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):
print " patternNZ (%d):" % len(patternNZ), patternNZ
print " classificationIn:", classification

# Store pattern in our history
self._patternNZHistory.append((recordNum, patternNZ))
# ensures that recordNum increases monotonically
if len(self._patternNZHistory) > 0:
if recordNum < self._patternNZHistory[-1][0]:
raise ValueError("the record number has to increase monotonically")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add to the docstring:

:raises: (ValueError) when record number does not increase monotonically.


# Store pattern in our history if this is a new record
if len(self._patternNZHistory) == 0 or \
recordNum > self._patternNZHistory[-1][0]:
self._patternNZHistory.append((recordNum, patternNZ))

# To allow multi-class classification, we need to be able to run learning
# without inference being on. So initialize retval outside
Expand All @@ -222,48 +229,63 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):
self._maxBucketIdx+1))), axis=0)
self._maxInputIdx = int(newMaxInputIdx)

# Get classification info
if classification is not None:
if type(classification["bucketIdx"]) is not list:
bucketIdxList = [classification["bucketIdx"]]
actValueList = [classification["actValue"]]
numCategory = 1
else:
bucketIdxList = classification["bucketIdx"]
actValueList = classification["actValue"]
numCategory = len(classification["bucketIdx"])
else:
if learn:
raise ValueError("classification cannot be None when learn=True")
actValueList = None
bucketIdxList = None
# ------------------------------------------------------------------------
# Inference:
# For each active bit in the activationPattern, get the classification
# votes
if infer:
retval = self.infer(patternNZ, classification)
retval = self.infer(patternNZ, actValueList)


if learn and classification["bucketIdx"] is not None:
# Get classification info
bucketIdx = classification["bucketIdx"]
actValue = classification["actValue"]

# Update maxBucketIndex and augment weight matrix with zero padding
if bucketIdx > self._maxBucketIdx:
for nSteps in self.steps:
self._weightMatrix[nSteps] = numpy.concatenate((
self._weightMatrix[nSteps],
numpy.zeros(shape=(self._maxInputIdx+1,
bucketIdx-self._maxBucketIdx))), axis=1)

self._maxBucketIdx = int(bucketIdx)

# Update rolling average of actual values if it's a scalar. If it's
# not, it must be a category, in which case each bucket only ever
# sees one category so we don't need a running average.
while self._maxBucketIdx > len(self._actualValues) - 1:
self._actualValues.append(None)
if self._actualValues[bucketIdx] is None:
self._actualValues[bucketIdx] = actValue
else:
if (isinstance(actValue, int) or
isinstance(actValue, float) or
isinstance(actValue, long)):
self._actualValues[bucketIdx] = ((1.0 - self.actValueAlpha)
* self._actualValues[bucketIdx]
+ self.actValueAlpha * actValue)
else:
for categoryI in range(numCategory):
bucketIdx = bucketIdxList[categoryI]
actValue = actValueList[categoryI]

# Update maxBucketIndex and augment weight matrix with zero padding
if bucketIdx > self._maxBucketIdx:
for nSteps in self.steps:
self._weightMatrix[nSteps] = numpy.concatenate((
self._weightMatrix[nSteps],
numpy.zeros(shape=(self._maxInputIdx+1,
bucketIdx-self._maxBucketIdx))), axis=1)

self._maxBucketIdx = int(bucketIdx)

# Update rolling average of actual values if it's a scalar. If it's
# not, it must be a category, in which case each bucket only ever
# sees one category so we don't need a running average.
while self._maxBucketIdx > len(self._actualValues) - 1:
self._actualValues.append(None)
if self._actualValues[bucketIdx] is None:
self._actualValues[bucketIdx] = actValue
else:
if (isinstance(actValue, int) or
isinstance(actValue, float) or
isinstance(actValue, long)):
self._actualValues[bucketIdx] = ((1.0 - self.actValueAlpha)
* self._actualValues[bucketIdx]
+ self.actValueAlpha * actValue)
else:
self._actualValues[bucketIdx] = actValue

for (learnRecordNum, learnPatternNZ) in self._patternNZHistory:
error = self._calculateError(recordNum, classification)
error = self._calculateError(recordNum, bucketIdxList)

nSteps = recordNum - learnRecordNum
if nSteps in self.steps:
Expand All @@ -289,7 +311,7 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):



def infer(self, patternNZ, classification):
def infer(self, patternNZ, actValueList):
"""
Return the inference value from one input sample. The actual
learning happens in compute().
Expand Down Expand Up @@ -319,10 +341,10 @@ def infer(self, patternNZ, classification):

# NOTE: If doing 0-step prediction, we shouldn't use any knowledge
# of the classification input during inference.
if self.steps[0] == 0 or classification is None:
if self.steps[0] == 0 or actValueList is None:
defaultValue = 0
else:
defaultValue = classification["actValue"]
defaultValue = actValueList[0]
actValues = [x if x is not None else defaultValue
for x in self._actualValues]
retval = {"actualValues": actValues}
Expand Down Expand Up @@ -436,19 +458,20 @@ def write(self, proto):
proto.verbosity = self.verbosity


def _calculateError(self, recordNum, classification):
def _calculateError(self, recordNum, bucketIdxList):
"""
Calculate error signal

:param classification: dict of the classification information:
bucketIdx: index of the encoder bucket
actValue: actual value going into the encoder
:param bucketIdxList: list of encoder buckets

:return: dict containing error. The key is the number of steps
The value is a numpy array of error at the output layer
"""
error = dict()
targetDist = numpy.zeros(self._maxBucketIdx + 1)
targetDist[classification["bucketIdx"]] = 1.0
numCategories = len(bucketIdxList)
for bucketIdx in bucketIdxList:
targetDist[bucketIdx] = 1.0/numCategories

for (learnRecordNum, learnPatternNZ) in self._patternNZHistory:
nSteps = recordNum - learnRecordNum
Expand Down
75 changes: 32 additions & 43 deletions src/nupic/regions/sdr_classifier_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,59 +364,48 @@ def compute(self, inputs, outputs):
# when network.run() is called
self._computeFlag = True

# An input can potentially belong to multiple categories.
# If a category value is < 0, it means that the input does not belong to
# that category.
categories = [category for category in inputs["categoryIn"]
if category >= 0]

patternNZ = inputs["bottomUpIn"].nonzero()[0]

# ==========================================================================
# Allow to train on multiple input categories.
# Do inference first, and then train on all input categories.

# --------------------------------------------------------------------------
# 1. Call classifier. Don't train. Just inference. Train after.

# Use Dummy classification input, because this param is required even for
# inference mode. Because learning is off, the classifier is not learning
# this dummy input. Inference only here.
classificationIn = {"actValue": 0, "bucketIdx": 0}
clResults = self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=False,
infer=self.inferenceMode)

# ------------------------------------------------------------------------
# 2. Train classifier, no inference
if self.learningMode:
for category in categories:
classificationIn = {"bucketIdx": int(category),
"actValue": int(category)}

self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=self.learningMode,
infer=False)

# If the input does not belong to a category, i.e. len(categories) == 0,
# then look for bucketIdx and actValueIn.
if len(categories) == 0:
# An input can potentially belong to multiple categories.
# If a category value is < 0, it means that the input does not belong to
# that category.
categories = [category for category in inputs["categoryIn"]
if category >= 0]

if len(categories) > 0:
# Allow to train on multiple input categories.
bucketIdxList = []
actValueList = []
for category in categories:
bucketIdxList.append(int(category))
actValueList.append(int(category))

classificationIn = {"bucketIdx": bucketIdxList,
"actValue": actValueList}
else:
# If the input does not belong to a category, i.e. len(categories) == 0,
# then look for bucketIdx and actValueIn.
if "bucketIdxIn" not in inputs:
raise KeyError("Network link missing: bucketIdxOut -> bucketIdxIn")
if "actValueIn" not in inputs:
raise KeyError("Network link missing: actValueOut -> actValueIn")

classificationIn = {"bucketIdx": int(inputs["bucketIdxIn"]),
"actValue": float(inputs["actValueIn"])}
self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=self.learningMode,
infer=False)
else:
# Use Dummy classification input, because this param is required even for
# inference mode. Because learning is off, the classifier is not learning
# this dummy input. Inference only here.
classificationIn = {"actValue": 0, "bucketIdx": 0}

# Perform inference if self.inferenceMode is True
# Train classifier if self.learningMode is True
clResults = self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=self.learningMode,
infer=self.inferenceMode)

# fill outputs with clResults
if clResults is not None and len(clResults) > 0:
Expand Down
100 changes: 97 additions & 3 deletions tests/integration/nupic/regions/single_step_sdr_classifier_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

from operator import itemgetter
import os
import tempfile
import unittest

import numpy as np

from datetime import datetime
from nupic.data.file_record_stream import FileRecordStream
from nupic.encoders import MultiEncoder, ScalarEncoder
from nupic.engine import Network

from nupic.frameworks.opf.model_factory import ModelFactory


def _getTempFileName():
Expand Down Expand Up @@ -128,7 +131,7 @@ def testSimpleMulticlassNetworkPY(self):
dataSource.close()
os.remove(filename)


@unittest.skip("Skip test until we updated SDR classifier in nupic.core")
def testSimpleMulticlassNetworkCPP(self):
# Setup data record stream of fake data (with three categories)
filename = _getTempFileName()
Expand Down Expand Up @@ -204,14 +207,105 @@ def testSimpleMulticlassNetworkCPP(self):
net.run(1)
inferredCats = classifier.getOutputData("categoriesOut")
self.assertSequenceEqual(expectedCats[i], inferredCats.tolist(),
"Classififer did not infer expected category "
"Classifier did not infer expected category "
"for record number {}.".format(i))

# Close data stream, delete file.
dataSource.close()
os.remove(filename)


def testHelloWorldPrediction(self):
text = 'hello world.'
categories = list("abcdefghijklmnopqrstuvwxyz 1234567890.")
colsPerChar = 11
numColumns = (len(categories) + 1) * colsPerChar

MODEL_PARAMS = {
"model": "HTMPrediction",
"version": 1,
"predictAheadTime": None,
"modelParams": {
"inferenceType": "TemporalMultiStep",
"sensorParams": {
"verbosity": 0,
"encoders": {
"token": {
"fieldname": u"token",
"name": u"token",
"type": "CategoryEncoder",
"categoryList": categories,
"w": colsPerChar,
"forced": True,
}
},
"sensorAutoReset": None,
},
"spEnable": False,
"spParams": {
"spVerbosity": 0,
"globalInhibition": 1,
"columnCount": 2048,
"inputWidth": 0,
"numActiveColumnsPerInhArea": 40,
"seed": 1956,
"columnDimensions": 0.5,
"synPermConnected": 0.1,
"synPermActiveInc": 0.1,
"synPermInactiveDec": 0.01,
"boostStrength": 0.0,
},

"tmEnable": True,
"tmParams": {
"verbosity": 0,
"columnCount": numColumns,
"cellsPerColumn": 16,
"inputWidth": numColumns,
"seed": 1960,
"temporalImp": "tm_cpp",
"newSynapseCount": 6,
"maxSynapsesPerSegment": 11,
"maxSegmentsPerCell": 32,
"initialPerm": 0.21,
"permanenceInc": 0.1,
"permanenceDec": 0.05,
"globalDecay": 0.0,
"maxAge": 0,
"minThreshold": 3,
"activationThreshold": 5,
"outputType": "normal",
},
"clParams": {
"implementation": "py",
"regionName": "SDRClassifierRegion",
"verbosity": 0,
"alpha": 0.1,
"steps": "1",
},
"trainSPNetOnlyIfRequested": False,
},
}

model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({"predictedField": "token"})
model.enableLearning()

# train
prediction = None
for rpt in xrange(20):
for token in text:
if prediction is not None:
if rpt > 15:
self.assertEqual(prediction, token)
modelInput = {"token": token}
result = model.run(modelInput)
prediction = sorted(result.inferences["multiStepPredictions"][1].items(),
key=itemgetter(1), reverse=True)[0][0]
model.resetSequenceStates()
prediction = None



if __name__ == "__main__":
unittest.main()
Loading