Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Problem initializer #101

Merged
merged 11 commits into from
Nov 28, 2018
13 changes: 13 additions & 0 deletions configs/problem_inits/COG_dataset_generator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Problem parameters:
training:
problem:
name: COG
data_folder: '~/data/cog'
set: train
dataset_type: generated
generation:
examples_per_task: 256
sequence_length: 5
memory_length: 4
max_distractors: 2
nr_processors: 6
5 changes: 2 additions & 3 deletions miprometheus/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Helpers.
from .index_splitter import IndexSplitter
from .problem_initializer import ProblemInitializer



__all__ = ['IndexSplitter']
__all__ = ['IndexSplitter','ProblemInitializer']
134 changes: 134 additions & 0 deletions miprometheus/helpers/problem_initializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python30
# -*- coding: utf-8 -*-
#
# Copyright (C) IBM Corporation 2018
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
problem_initializer.py:

- Contains the definition of a new ``Helper`` class, called ProblemInitializer.
- ProblemInitializer runs __init__ for a given problem, downloading and/or generating its datasets as necessary.
- Additionally, contains helpful function(s) to aid in checking and downloading data for problems.

"""

__author__ = "Emre Sevgen"

import os
import argparse
#import json

from miprometheus.problems.problem_factory import ProblemFactory
from miprometheus.workers import Worker

class ProblemInitializer(Worker):

def __init__(self, config=None, name=None, path=None):
"""
Initialize ProblemInitializer, which runs the __init__ for a provided problem, optionally overriding some parameters.

:param config: Path to a config file to initialize from.
:type config: string
:param name: Name of a problem to initialize using default parameters
:type name: string
:param path: Path to initialize problem, overrides default data_folder if provided.
:type path: string

"""

# Call base constructor to set up app state, registry and add default params.
super(ProblemInitializer, self).__init__(name='ProblemInitializer', add_default_parser_args=False)

# If no config is provided, try to build a problem from command line name.
if config is None:
self.params.add_default_params({'problem': {'name': name}})

# If config is provided, parse and build problem from it.
else:
try:
#self.params.add_config_params_from_yaml(os.path.expanduser(config))
configs_to_load = self.recurrent_config_parse(os.path.expanduser(config),[])
self.recurrent_config_load(configs_to_load)
self.params = self.params['training']
except FileNotFoundError:
print("Config file at path '{}' not found.".format(config))
exit(1)

# If path is provided, override default path.
if path is not None:
self.params.add_config_params({'problem': {'data_folder': path}})

# Pass initialization only flag.
self.params.add_config_params({'problem':{'initialization_only': True}})

# Build Problem
try:
_ = ProblemFactory.build(self.params['problem'])
except AttributeError:
print("Provided problem name not found.")
exit(1)

# Useful function to properly parse argparse
def int_or_str(val):
try:
return int(val)
except ValueError:
return val

# Useful function to properly parse argparse
def str_to_bool(val):
if val.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif val.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')

if __name__ == "__main__":
""" May be used as a script to prepare a dataset for subsequent runs. """

# Create parser with a list of runtime arguments.
parser = argparse.ArgumentParser(description=
'Initializes any problem, thereby downloading any prerequisite datasets if required. \n' +
'A dataset can be initialize either from a config file with --c, or from the command line directly ' +
'with --problem to initialize a problem with its default parameters. An optional --path argument' +
'can be provided to override default problem path.'
, formatter_class=argparse.RawTextHelpFormatter)

parser.add_argument('--c', type=str,
help='A config file to initialize from.')

parser.add_argument('--problem', type=str,
help='Initialize a problem with its default parameters directly from the command line')

parser.add_argument('--path', type=str,
help='Change from problem default path to this path when initializing')

# Add a command line dict parser
#parser.add_argument('--options', type=json.loads,
#help='A dictionary to initialize from, obtained directly from the command line.' +
#'If --c is provided, arguments here will override the config file parameters.')

args = parser.parse_args()

if args.c is None and args.problem is None:
print("Please provide either a config file or a problem name.")
exit(1)

elif args.c is not None and args.problem is not None:
print("Both a config and a problem name is provided. Please only provide one or the other.")
exit(1)

ProblemInitializer(args.c, args.problem, args.path)
43 changes: 43 additions & 0 deletions miprometheus/problems/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@

import signal
import torch
import os
import logging
import urllib
import time
import sys
import numpy as np
from torch.utils.data import Dataset

Expand Down Expand Up @@ -436,6 +440,45 @@ def curriculum_learning_update_params(self, episode):

return True

# Function to make check and download easier
def CheckAndDownload(self,filefoldertocheck, url='none', downloadname='~/data/downloaded'):
"""
Checks whether a file or folder exists at given path (relative to storage folder), otherwise downloads files from given URL.

:param filefoldertocheck: Relative path to a file or folder to check to see if it exists.
:type filefoldertocheck: string
:param url: URL to download files from.
:type url: string
:param downloadname: What to name the downloaded file. (DEFAULT: "downloaded").
:type downloadname: string.

:return: False if file was found, True if a download was necessary.

"""
filefoldertocheck = os.path.expanduser(filefoldertocheck)
if not ( os.path.isfile( filefoldertocheck) or
os.path.isdir ( filefoldertocheck) ):
print('Downloading {}'.format(url))
urllib.request.urlretrieve(url, os.path.expanduser(downloadname), self.reporthook)
return True
else:
print('Dataset found at {}'.format(filefoldertocheck))
return False

# Progress bar function
def reporthook(self,count, block_size, total_size):
global start_time
if count == 0:
start_time = time.time()
return
duration = time.time() - start_time
progress_size = int(count * block_size)
speed = int(progress_size / (1024 * duration))
percent = int(count * block_size * 100 / total_size)
sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
(percent, progress_size / (1024 * 1024), speed, duration))
sys.stdout.flush()


if __name__ == '__main__':
"""Unit test for Problem and DataDict"""
Expand Down
3 changes: 3 additions & 0 deletions miprometheus/problems/seq_to_seq/vqa/cog/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .cog import COG

__all__ = ['COG']
Loading