Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaces datasethelpers with pkg_resources. #1902

Merged
merged 7 commits into from
Mar 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ recursive-include extensions *.hpp *.cpp
recursive-include external/common/share/swig *.*
recursive-include external/darwin64 swig *.a
recursive-include external/linux64 swig *.a

recursive-include nupic/datafiles *.csv *.txt
10 changes: 6 additions & 4 deletions examples/network/hierarchy_network_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@
import os
import math

from pkg_resources import resource_filename
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: put blank line before pkg_resources, it is a third-party lib

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same elsewhere


from nupic.algorithms.anomaly import computeRawAnomalyScore
from nupic.data.datasethelpers import findDataset
from nupic.data.file_record_stream import FileRecordStream
from nupic.engine import Network
from nupic.encoders import MultiEncoder
Expand All @@ -45,7 +46,9 @@

# Seed used for random number generation
_SEED = 2045
_INPUT_FILE_PATH = "../prediction/data/extra/hotgym/rec-center-hourly.csv"
_INPUT_FILE_PATH = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)
_OUTPUT_FILE_NAME = "hierarchy-demo-output.csv"

# Parameter dict for SPRegion
Expand Down Expand Up @@ -347,8 +350,7 @@ def runNetwork(network, numRecords, writer):


def runDemo():
trainFile = findDataset(_INPUT_FILE_PATH)
dataSource = FileRecordStream(streamID=trainFile)
dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
numRecords = dataSource.getDataRowCount()
print "Creating network"
network = createNetwork(dataSource)
Expand Down
10 changes: 6 additions & 4 deletions examples/network/network_api_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,18 @@
import json
import os

from pkg_resources import resource_filename

from nupic.algorithms.anomaly import computeRawAnomalyScore
from nupic.data.datasethelpers import findDataset
from nupic.data.file_record_stream import FileRecordStream
from nupic.engine import Network
from nupic.encoders import MultiEncoder

_VERBOSITY = 0 # how chatty the demo should be
_SEED = 1956 # the random seed used throughout
_DATA_PATH = "extra/hotgym/rec-center-hourly.csv"
_INPUT_FILE_PATH = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)
_OUTPUT_PATH = "network-demo-output.csv"
_NUM_RECORDS = 2000

Expand Down Expand Up @@ -213,8 +216,7 @@ def runNetwork(network, writer):


if __name__ == "__main__":
trainFile = findDataset(_DATA_PATH)
dataSource = FileRecordStream(streamID=trainFile)
dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)

network = createNetwork(dataSource)
outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_PATH)
Expand Down
13 changes: 7 additions & 6 deletions examples/opf/clients/hotgym/anomaly/hotgym_anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# ----------------------------------------------------------------------

"""
A simple client to create a CLA anomaly detection model for hotgym.
A simple client to create a HTM anomaly detection model for hotgym.
The script prints out all records that have an abnormally high anomaly
score.
"""
Expand All @@ -30,16 +30,17 @@
import datetime
import logging

from nupic.data.datasethelpers import findDataset
from pkg_resources import resource_filename

from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.frameworks.opf.predictionmetricsmanager import MetricsManager

import model_params

_LOGGER = logging.getLogger(__name__)

_DATA_PATH = "extra/hotgym/rec-center-hourly.csv"

_INPUT_DATA_FILE = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)
_OUTPUT_PATH = "anomaly_scores.csv"

_ANOMALY_THRESHOLD = 0.9
Expand All @@ -52,7 +53,7 @@ def createModel():
def runHotgymAnomaly():
model = createModel()
model.enableInference({'predictedField': 'consumption'})
with open (findDataset(_DATA_PATH)) as fin:
with open (_INPUT_DATA_FILE) as fin:
reader = csv.reader(fin)
csvWriter = csv.writer(open(_OUTPUT_PATH,"wb"))
csvWriter.writerow(["timestamp", "consumption", "anomaly_score"])
Expand Down
9 changes: 6 additions & 3 deletions examples/opf/clients/hotgym/simple/hotgym.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
import datetime
import logging

from nupic.data.datasethelpers import findDataset
from pkg_resources import resource_filename

from nupic.frameworks.opf.metrics import MetricSpec
from nupic.frameworks.opf.modelfactory import ModelFactory
from nupic.frameworks.opf.predictionmetricsmanager import MetricsManager
Expand All @@ -35,7 +36,9 @@

_LOGGER = logging.getLogger(__name__)

_DATA_PATH = "extra/hotgym/rec-center-hourly.csv"
_INPUT_FILE_PATH = resource_filename(
"nupic.datafiles", "extra/hotgym/rec-center-hourly.csv"
)

_METRIC_SPECS = (
MetricSpec(field='consumption', metric='multiStep',
Expand Down Expand Up @@ -66,7 +69,7 @@ def runHotgym():
model.enableInference({'predictedField': 'consumption'})
metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(),
model.getInferenceType())
with open (findDataset(_DATA_PATH)) as fin:
with open (_INPUT_FILE_PATH) as fin:
reader = csv.reader(fin)
headers = reader.next()
reader.next()
Expand Down
10 changes: 5 additions & 5 deletions nupic/data/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
import datetime
from collections import defaultdict

from pkg_resources import resource_filename

from nupic.data import SENTINEL_VALUE_FOR_MISSING_DATA
from nupic.data.file_record_stream import FileRecordStream
from nupic.data.datasethelpers import findDataset


"""The aggregator aggregates PF datasets
Expand Down Expand Up @@ -755,8 +756,7 @@ def generateDataset(aggregationInfo, inputFilename, outputFilename=None):
have values of 0, then aggregation will be suppressed, and the given
inputFile parameter value will be returned.

inputFilename: filename (or relative path form NTA_DATA_PATH) of
the input dataset
inputFilename: filename of the input dataset within examples/prediction/data

outputFilename: name for the output file. If not given, a name will be
generated based on the input filename and the aggregation params
Expand All @@ -781,7 +781,7 @@ def generateDataset(aggregationInfo, inputFilename, outputFilename=None):


# Create the input stream
inputFullPath = findDataset(inputFilename)
inputFullPath = resource_filename("nupic.datafiles", inputFilename)
inputObj = FileRecordStream(inputFullPath)


Expand Down Expand Up @@ -864,7 +864,7 @@ def getFilename(aggregationInfo, inputFile):
"""

# Find the actual file, with an absolute path
inputFile = findDataset(inputFile)
inputFile = resource_filename("nupic.datafiles", inputFile)

a = defaultdict(lambda: 0, aggregationInfo)
outputDir = os.path.dirname(inputFile)
Expand Down
184 changes: 0 additions & 184 deletions nupic/data/datasethelpers.py

This file was deleted.

5 changes: 3 additions & 2 deletions nupic/data/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import os
import pickle

from nupic.data.datasethelpers import findDataset
from pkg_resources import resource_filename

from nupic.regions.RecordSensor import RecordSensor
from nupic.data.file_record_stream import FileRecordStream

Expand Down Expand Up @@ -128,7 +129,7 @@ def generateStats(filename, statsInfo, maxSamples = None, filters=[], cache=True
raise RuntimeError("statsInfo must be a dict -- "
"found '%s' instead" % type(statsInfo))

filename = findDataset(filename)
filename = resource_filename("nupic.datafiles", filename)

if cache:
statsFilename = getStatsFilename(filename, statsInfo, filters)
Expand Down
Loading