Skip to content

Commit

Permalink
Merge pull request numenta#1902 from rhyolight/standard-example-data3
Browse files Browse the repository at this point in the history
Replaces datasethelpers with pkg_resources.
  • Loading branch information
rhyolight committed Mar 5, 2015
2 parents a4542d8 + 0c7901d commit b027031
Show file tree
Hide file tree
Showing 145 changed files with 151 additions and 412 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ recursive-include extensions *.hpp *.cpp
recursive-include external/common/share/swig *.*
recursive-include external/darwin64 swig *.a
recursive-include external/linux64 swig *.a

recursive-include nupic/datafiles *.csv *.txt
10 changes: 6 additions & 4 deletions examples/network/hierarchy_network_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@
import os
import math

from pkg_resources import resource_filename

from nupic.algorithms.anomaly import computeRawAnomalyScore
from nupic.data.datasethelpers import findDataset
from nupic.data.file_record_stream import FileRecordStream
from nupic.engine import Network
from nupic.encoders import MultiEncoder
Expand All @@ -45,7 +46,9 @@

# Seed used for random number generation
_SEED = 2045
_INPUT_FILE_PATH = "../prediction/data/extra/hotgym/rec-center-hourly.csv"
_INPUT_FILE_PATH = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)
_OUTPUT_FILE_NAME = "hierarchy-demo-output.csv"

# Parameter dict for SPRegion
Expand Down Expand Up @@ -347,8 +350,7 @@ def runNetwork(network, numRecords, writer):


def runDemo():
trainFile = findDataset(_INPUT_FILE_PATH)
dataSource = FileRecordStream(streamID=trainFile)
dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
numRecords = dataSource.getDataRowCount()
print "Creating network"
network = createNetwork(dataSource)
Expand Down
10 changes: 6 additions & 4 deletions examples/network/network_api_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,18 @@
import json
import os

from pkg_resources import resource_filename

from nupic.algorithms.anomaly import computeRawAnomalyScore
from nupic.data.datasethelpers import findDataset
from nupic.data.file_record_stream import FileRecordStream
from nupic.engine import Network
from nupic.encoders import MultiEncoder

_VERBOSITY = 0 # how chatty the demo should be
_SEED = 1956 # the random seed used throughout
_DATA_PATH = "extra/hotgym/rec-center-hourly.csv"
_INPUT_FILE_PATH = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)
_OUTPUT_PATH = "network-demo-output.csv"
_NUM_RECORDS = 2000

Expand Down Expand Up @@ -213,8 +216,7 @@ def runNetwork(network, writer):


if __name__ == "__main__":
trainFile = findDataset(_DATA_PATH)
dataSource = FileRecordStream(streamID=trainFile)
dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)

network = createNetwork(dataSource)
outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_PATH)
Expand Down
13 changes: 7 additions & 6 deletions examples/opf/clients/hotgym/anomaly/hotgym_anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# ----------------------------------------------------------------------

"""
A simple client to create a CLA anomaly detection model for hotgym.
A simple client to create a HTM anomaly detection model for hotgym.
The script prints out all records that have an abnormally high anomaly
score.
"""
Expand All @@ -30,16 +30,17 @@
import datetime
import logging

from nupic.data.datasethelpers import findDataset
from pkg_resources import resource_filename

from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.frameworks.opf.predictionmetricsmanager import MetricsManager

import model_params

_LOGGER = logging.getLogger(__name__)

_DATA_PATH = "extra/hotgym/rec-center-hourly.csv"

_INPUT_DATA_FILE = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)
_OUTPUT_PATH = "anomaly_scores.csv"

_ANOMALY_THRESHOLD = 0.9
Expand All @@ -52,7 +53,7 @@ def createModel():
def runHotgymAnomaly():
model = createModel()
model.enableInference({'predictedField': 'consumption'})
with open (findDataset(_DATA_PATH)) as fin:
with open (_INPUT_DATA_FILE) as fin:
reader = csv.reader(fin)
csvWriter = csv.writer(open(_OUTPUT_PATH,"wb"))
csvWriter.writerow(["timestamp", "consumption", "anomaly_score"])
Expand Down
9 changes: 6 additions & 3 deletions examples/opf/clients/hotgym/simple/hotgym.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
import datetime
import logging

from nupic.data.datasethelpers import findDataset
from pkg_resources import resource_filename

from nupic.frameworks.opf.metrics import MetricSpec
from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.frameworks.opf.predictionmetricsmanager import MetricsManager
Expand All @@ -35,7 +36,9 @@

_LOGGER = logging.getLogger(__name__)

_DATA_PATH = "extra/hotgym/rec-center-hourly.csv"
_INPUT_FILE_PATH = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)

_METRIC_SPECS = (
MetricSpec(field='consumption', metric='multiStep',
Expand Down Expand Up @@ -66,7 +69,7 @@ def runHotgym():
model.enableInference({'predictedField': 'consumption'})
metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(),
model.getInferenceType())
with open (findDataset(_DATA_PATH)) as fin:
with open (_INPUT_FILE_PATH) as fin:
reader = csv.reader(fin)
headers = reader.next()
reader.next()
Expand Down
10 changes: 5 additions & 5 deletions nupic/data/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
import datetime
from collections import defaultdict

from pkg_resources import resource_filename

from nupic.data import SENTINEL_VALUE_FOR_MISSING_DATA
from nupic.data.file_record_stream import FileRecordStream
from nupic.data.datasethelpers import findDataset


"""The aggregator aggregates PF datasets
Expand Down Expand Up @@ -755,8 +756,7 @@ def generateDataset(aggregationInfo, inputFilename, outputFilename=None):
have values of 0, then aggregation will be suppressed, and the given
inputFile parameter value will be returned.
inputFilename: filename (or relative path form NTA_DATA_PATH) of
the input dataset
inputFilename: filename of the input dataset within examples/prediction/data
outputFilename: name for the output file. If not given, a name will be
generated based on the input filename and the aggregation params
Expand All @@ -781,7 +781,7 @@ def generateDataset(aggregationInfo, inputFilename, outputFilename=None):


# Create the input stream
inputFullPath = findDataset(inputFilename)
inputFullPath = resource_filename("nupic.datafiles", inputFilename)
inputObj = FileRecordStream(inputFullPath)


Expand Down Expand Up @@ -864,7 +864,7 @@ def getFilename(aggregationInfo, inputFile):
"""

# Find the actual file, with an absolute path
inputFile = findDataset(inputFile)
inputFile = resource_filename("nupic.datafiles", inputFile)

a = defaultdict(lambda: 0, aggregationInfo)
outputDir = os.path.dirname(inputFile)
Expand Down
184 changes: 0 additions & 184 deletions nupic/data/datasethelpers.py

This file was deleted.

5 changes: 3 additions & 2 deletions nupic/data/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import os
import pickle

from nupic.data.datasethelpers import findDataset
from pkg_resources import resource_filename

from nupic.regions.RecordSensor import RecordSensor
from nupic.data.file_record_stream import FileRecordStream

Expand Down Expand Up @@ -128,7 +129,7 @@ def generateStats(filename, statsInfo, maxSamples = None, filters=[], cache=True
raise RuntimeError("statsInfo must be a dict -- "
"found '%s' instead" % type(statsInfo))

filename = findDataset(filename)
filename = resource_filename("nupic.datafiles", filename)

if cache:
statsFilename = getStatsFilename(filename, statsInfo, filters)
Expand Down
Loading

0 comments on commit b027031

Please sign in to comment.