diff --git a/README.md b/README.md index d904142..37845e2 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ pip install blobcity ``` Python import blobcity as bc -bc.train("data.csv", "Y_column") +bc.train(file="data.csv", target="Y_column") bc.spill("my_code.ipynb") ``` `Y_column` is the name of the target column. The column must be present within the data provided. @@ -21,6 +21,20 @@ Support input data formats are `.csv` and `.xlsx`. Extension for other file form The `spill` function generates the model code with exhaustive documentation. Training code is also included for basic scikit-learn models. TensorFlow and other DNN models produce only the test / final use code. +## Use a Pandas Data Frame +``` Python +bc.train(df=my_df, target="Y_column") +``` + +If loading data from a Database or external system, create a DataFrame from your data source, and pass it directly to the `train` function. + +## From a URL +``` Python +bc.train(file="https://example.com/data.csv", target="Y_column") +``` + +The `file` parameter can be a local file, or a URL. The function will load the data from URL specified. The file at the URL must be either in CSV or XLSX format. The URL should be accessible publicly without authentication. + # Code Generation Multiple formats of code generation is supported by the framework. The `spill` function can be used to generate both `ipynb` and `py` files. The desired type is infered from the name of the output file. The code file will be created at the path specified. Relative or absolute file paths, are both supported. @@ -41,7 +55,7 @@ bc.spill("my_code.py", docs=True) ``` Pass the optional `docs` parameter to generate Python code along with full code documentation. -# Specifying `X_Values` +# Specifying Features Framework automatically performs a feature selection. All features are selected by default for feature selection. Framework is smart enough to remove ID / Primary key columns. @@ -51,3 +65,8 @@ Use the below code if you would like to manually specify the features to be used bc.train("data.csv", target="Y_value", features=["col1", "col2", "col3"]) ``` +# Printing Model Stats +``` Python +model = bc.train(file='./test.csv') +model.stats() +``` diff --git a/blobcity/main/driver.py b/blobcity/main/driver.py index 8202b74..585c04b 100644 --- a/blobcity/main/driver.py +++ b/blobcity/main/driver.py @@ -25,7 +25,7 @@ from blobcity.utils import AutoFeatureSelection as AFS from blobcity.utils import writeYml from blobcity.main.modelSelection import modelSearch -def train(file_path=None, df=None, target=None,features=None): +def train(file=None, df=None, target=None,features=None): # this should internally create and a yml file. The yml file is used for generating the code in the future. # this should also store a pickle / tensorflow file based on the model used # Data read @@ -33,8 +33,8 @@ def train(file_path=None, df=None, target=None,features=None): dc=DictClass() dc.resetVar() #data read - if file_path!=None: - dataframe= getDataFrameType(file_path, dc) + if file!=None: + dataframe= getDataFrameType(file, dc) else: dataframe = df dc.addKeyValue('data_read',{"class":"df"})