diff --git a/README.md b/README.md index a63125a..5f2c8a1 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ pip install cookiecutter cookiecutter https://github.com/NICD-UK/project-template ``` -You will be prompted for nine inputs: +You will be prompted for eleven inputs: 1. Project Name 2. Project Directory Name @@ -19,8 +19,9 @@ You will be prompted for nine inputs: 6. Project Sponsor Email 7. Project Summary 8. Raw Data Directory -9. `venv` Project (No / Yes) -10. `git` Project (No / Yes) +9. Language (Python / R) +10. `venv` Project (No / Yes) +11. `git` Project (No / Yes) ## Organization @@ -32,7 +33,6 @@ data/ ├─ model/ ├─ raw/ ├─ wrangle/ -notebooks/ reports/ ├─ clean/ ├─ final/ @@ -50,7 +50,7 @@ src/ - **Determine Objectives:** - **Determine Deliverables:** - **Determine Resources:** -- **Plan Project:** +- **Plan Project:** ### 2. Data Preparation and Understanding @@ -60,16 +60,16 @@ src/ ### 3. Prototyping -- **Develop Data Product** -- **Evaluate Data Product** -- **Approve Data Product** +- **Develop Data Product:** +- **Evaluate Data Product:** +- **Approve Data Product:** ### 4. Production -- **Deploy Data Product** -- **Monitor Data Product** -- **Maintain Data Product** -- **Close Project** +- **Deploy Data Product:** +- **Monitor Data Product:** +- **Maintain Data Product:** +- **Close Project:** ## Guide diff --git a/cookiecutter.json b/cookiecutter.json index 1688d25..d83e8a0 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -7,6 +7,7 @@ "project_sponsor_email": "Project Sponsor Email", "project_summary": "Project Summary", "raw_data_directory": "data/raw", + "language": ["Python", "R"], "venv_project": ["No", "Yes"], "git_project": ["No", "Yes"] } diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index 4c01e05..1e3f979 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -1,7 +1,21 @@ import subprocess +import glob +import os venv_project = "{{cookiecutter.venv_project}}" git_project = "{{cookiecutter.git_project}}" +language = "{{cookiecutter.language}}" + +# create Python project +if language == "Python": + os.remove("{{cookiecutter.project_directory_name}}.Rproj") + for file in glob.glob("**/*.Rmd", recursive=True): + os.remove(file) + +# create R project +if language == "R": + for file in glob.glob("**/*.py", recursive=True): + os.remove(file) # create venv project if venv_project == "Yes": diff --git a/{{cookiecutter.project_directory_name}}/notebooks/.gitkeep b/{{cookiecutter.project_directory_name}}/notebooks/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd new file mode 100644 index 0000000..1b398bf --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd @@ -0,0 +1,16 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "" +``` + +# Read Data +```{r} +clean_data <- read_rds(here("data", "clean", glue("{data_name}.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/reports/clean/clean.py b/{{cookiecutter.project_directory_name}}/reports/clean/clean.py new file mode 100644 index 0000000..63614ea --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/reports/clean/clean.py @@ -0,0 +1,10 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +clean_data = pandas.read_pickle(os.path.join(here(), "data", "clean", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd new file mode 100644 index 0000000..567ba71 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd @@ -0,0 +1,16 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "" +``` + +# Read Data +```{r} +wrangle_data <- read_rds(here("data", "wrangle", glue("{data_name}.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py new file mode 100644 index 0000000..ddc00f3 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py @@ -0,0 +1,10 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +wrangle_data = pandas.read_pickle(os.path.join(here(), "data", "wrangle", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/src/README.md b/{{cookiecutter.project_directory_name}}/src/README.md new file mode 100644 index 0000000..7638b90 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/README.md @@ -0,0 +1,28 @@ +# Transformation Checklist + +## Motivation + +## Cleaning Checklist + +For each data source: + +- [ ] read data from `/data/raw/` +- [ ] ... +- [ ] write data to `/data/clean/` + +## Wrangling Checklist + +For each data product: + +- [ ] read data from `/data/clean/` +- [ ] ... +- [ ] write data to `/data/wrangle/` + +## Processing + +For models: + + + + + diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd new file mode 100644 index 0000000..9bc9069 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd @@ -0,0 +1,26 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "" +``` + +# Read Data +```{r} +raw_data <- read_csv(here("data", "raw", glue("{data_name}.csv"))) +``` + +# Clean Data +```{r} +clean_data <- raw_data +``` + +# Write Data +```{r} +write_rds(clean_data, here("data", "clean", glue("{data_name}.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.py b/{{cookiecutter.project_directory_name}}/src/clean/clean.py new file mode 100644 index 0000000..2fa01e2 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.py @@ -0,0 +1,16 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +raw_data = pandas.read_csv(os.path.join(here(), "data", "raw", f"{data_name}.csv")) + +#%% Clean Data +clean_data = raw_data + +#%% Write Data +clean_data.to_pickle(os.path.join(here(), "data", "clean", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd new file mode 100644 index 0000000..567ba71 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd @@ -0,0 +1,16 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "" +``` + +# Read Data +```{r} +wrangle_data <- read_rds(here("data", "wrangle", glue("{data_name}.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.py b/{{cookiecutter.project_directory_name}}/src/model/model.py new file mode 100644 index 0000000..ddc00f3 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/model/model.py @@ -0,0 +1,10 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +wrangle_data = pandas.read_pickle(os.path.join(here(), "data", "wrangle", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd new file mode 100644 index 0000000..ee8d924 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd @@ -0,0 +1,26 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "" +``` + +# Read Data +```{r} +clean_data <- read_rds(here("data", "clean", glue("{data_name}.rds"))) +``` + +# Wrangle Data +```{r} +wrangle_data <- clean_data +``` + +# Write Data +```{r} +write_rds(wrangle_data, here("data", "wrangle", glue("{data_name}.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py new file mode 100644 index 0000000..cebc583 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py @@ -0,0 +1,16 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +clean_data = pandas.read_pickle(os.path.join(here(), "data", "clean", f"{data_name}.pkl")) + +#%% Clean Data +wrangle_data = clean_data + +#%% Write Data +wrangle_data.to_pickle(os.path.join(here(), "data", "wrangle", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj b/{{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj new file mode 100644 index 0000000..02e3b31 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX \ No newline at end of file