From 3a6645a8cbf759b9c249a7062fdaf4178249fdf4 Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Wed, 18 Jan 2023 14:42:47 +0000 Subject: [PATCH 01/11] draft cleaning checklist --- README.md | 18 +++++------ .../src/README.md | 31 +++++++++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 {{cookiecutter.project_directory_name}}/src/README.md diff --git a/README.md b/README.md index a63125a..a5037ec 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ pip install cookiecutter cookiecutter https://github.com/NICD-UK/project-template ``` -You will be prompted for nine inputs: +You will be prompted for ten inputs: 1. Project Name 2. Project Directory Name @@ -50,7 +50,7 @@ src/ - **Determine Objectives:** - **Determine Deliverables:** - **Determine Resources:** -- **Plan Project:** +- **Plan Project:** ### 2. Data Preparation and Understanding @@ -60,16 +60,16 @@ src/ ### 3. Prototyping -- **Develop Data Product** -- **Evaluate Data Product** -- **Approve Data Product** +- **Develop Data Product:** +- **Evaluate Data Product:** +- **Approve Data Product:** ### 4. Production -- **Deploy Data Product** -- **Monitor Data Product** -- **Maintain Data Product** -- **Close Project** +- **Deploy Data Product:** +- **Monitor Data Product:** +- **Maintain Data Product:** +- **Close Project:** ## Guide diff --git a/{{cookiecutter.project_directory_name}}/src/README.md b/{{cookiecutter.project_directory_name}}/src/README.md new file mode 100644 index 0000000..978dcfb --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/README.md @@ -0,0 +1,31 @@ +# Transformation Checklist + +## Motivation + +## Cleaning Checklist + +For each data source: + +- [ ] import data from `/data/raw/` +- [ ] column names +- [ ] missing values +- [ ] column types +- [ ] quality check +- [ ] write data to `/data/clean/` + +## Wrangling Checklist + +For each data product: + +- [ ] read data from `/data/clean/` +- [ ] ... +- [ ] write data to `/data/wrangle/` + +## Processing + +For models: + + + + + From bb84cc4161b3e91d3a1a4a46e6181b8a6ff728ef Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Wed, 25 Jan 2023 13:57:32 +0000 Subject: [PATCH 02/11] simplified checklists --- {{cookiecutter.project_directory_name}}/src/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/{{cookiecutter.project_directory_name}}/src/README.md b/{{cookiecutter.project_directory_name}}/src/README.md index 978dcfb..cf883ea 100644 --- a/{{cookiecutter.project_directory_name}}/src/README.md +++ b/{{cookiecutter.project_directory_name}}/src/README.md @@ -7,10 +7,7 @@ For each data source: - [ ] import data from `/data/raw/` -- [ ] column names -- [ ] missing values -- [ ] column types -- [ ] quality check +- [ ] ... - [ ] write data to `/data/clean/` ## Wrangling Checklist From 6ce418b75095661c54ec22b7ee11c824fc3f89ed Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Wed, 25 Jan 2023 14:27:09 +0000 Subject: [PATCH 03/11] added clean R template script --- .../config.yml | 1 + .../src/clean/clean.Rmd | 28 +++++++++++++++++++ .../src/model/model.Rmd | 0 .../src/wrangle/wrangle.Rmd | 0 4 files changed, 29 insertions(+) create mode 100644 {{cookiecutter.project_directory_name}}/src/clean/clean.Rmd create mode 100644 {{cookiecutter.project_directory_name}}/src/model/model.Rmd create mode 100644 {{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd diff --git a/{{cookiecutter.project_directory_name}}/config.yml b/{{cookiecutter.project_directory_name}}/config.yml index bd87759..06254f5 100644 --- a/{{cookiecutter.project_directory_name}}/config.yml +++ b/{{cookiecutter.project_directory_name}}/config.yml @@ -1,2 +1,3 @@ default: raw_data_directory: {{cookiecutter.raw_data_directory}} + raw_data_name: "sample" \ No newline at end of file diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd new file mode 100644 index 0000000..74230ad --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd @@ -0,0 +1,28 @@ +# Load Libraries +```{r message=FALSE} +library(tidyverse) +library(janitor) +library(here) +library(glue)= +``` + +# Setup +```{r} +data_name <- config::get("raw_data_name") +``` + +# Read Data +```{r} +raw_data <- read_csv(here("data", "raw", glue("{data_name}-data.csv"))) +``` + +# Clean Data +```{r} +clean_data <- raw_data |> + clean_names() +``` + +# Write data +```{r} +write_rds(clean_data, here("data", "clean", glue("cleaned-{data_name}-data.rds"))) +``` \ No newline at end of file diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd new file mode 100644 index 0000000..e69de29 From 0a9586abb77a37b71a7a21c74306149cc08385b3 Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Wed, 25 Jan 2023 14:42:36 +0000 Subject: [PATCH 04/11] clean wrangle and model template scripts --- .../config.yml | 1 - .../src/clean/clean.Rmd | 18 ++++++------- .../src/model/model.Rmd | 16 ++++++++++++ .../src/wrangle/wrangle.Rmd | 26 +++++++++++++++++++ 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/{{cookiecutter.project_directory_name}}/config.yml b/{{cookiecutter.project_directory_name}}/config.yml index 06254f5..bd87759 100644 --- a/{{cookiecutter.project_directory_name}}/config.yml +++ b/{{cookiecutter.project_directory_name}}/config.yml @@ -1,3 +1,2 @@ default: raw_data_directory: {{cookiecutter.raw_data_directory}} - raw_data_name: "sample" \ No newline at end of file diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd index 74230ad..d903493 100644 --- a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd @@ -1,28 +1,26 @@ # Load Libraries ```{r message=FALSE} -library(tidyverse) -library(janitor) +library(glue) library(here) -library(glue)= +library(tidyverse) ``` # Setup ```{r} -data_name <- config::get("raw_data_name") +data_name <- "example" ``` # Read Data ```{r} -raw_data <- read_csv(here("data", "raw", glue("{data_name}-data.csv"))) +raw_data <- read_csv(here("data", "raw", glue("raw-{data_name}-data.csv"))) ``` # Clean Data ```{r} -clean_data <- raw_data |> - clean_names() +clean_data <- raw_data ``` -# Write data +# Write Data ```{r} -write_rds(clean_data, here("data", "clean", glue("cleaned-{data_name}-data.rds"))) -``` \ No newline at end of file +write_rds(clean_data, here("data", "clean", glue("clean-{data_name}-data.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd index e69de29..b054900 100644 --- a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd @@ -0,0 +1,16 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "example" +``` + +# Read Data +```{r} +wrangle_data <- read_rds(here("data", "wrangle", glue("wrangle-{data_name}-data.rds"))) +``` diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd index e69de29..ae085a6 100644 --- a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd @@ -0,0 +1,26 @@ +# Load Libraries +```{r message=FALSE} +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "example" +``` + +# Read Data +```{r} +clean_data <- read_rds(here("data", "clean", glue("clean-{data_name}-data.rds"))) +``` + +# Wrangle Data +```{r} +wrangle_data <- clean_data +``` + +# Write Data +```{r} +write_rds(wrangle_data, here("data", "wrangle", glue("wrangle-{data_name}-data.rds"))) +``` \ No newline at end of file From 2355429e73eb7e582da794b27c0759a884a59fc3 Mon Sep 17 00:00:00 2001 From: mt-edwards <38671647+mt-edwards@users.noreply.github.com> Date: Wed, 25 Jan 2023 14:44:35 +0000 Subject: [PATCH 05/11] Update README.md changed import to read in the checklist --- {{cookiecutter.project_directory_name}}/src/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/{{cookiecutter.project_directory_name}}/src/README.md b/{{cookiecutter.project_directory_name}}/src/README.md index cf883ea..7638b90 100644 --- a/{{cookiecutter.project_directory_name}}/src/README.md +++ b/{{cookiecutter.project_directory_name}}/src/README.md @@ -6,7 +6,7 @@ For each data source: -- [ ] import data from `/data/raw/` +- [ ] read data from `/data/raw/` - [ ] ... - [ ] write data to `/data/clean/` From 713590b5f0c39b7f3230f15d5cc7f783b0d0cfdc Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Wed, 25 Jan 2023 17:23:50 +0000 Subject: [PATCH 06/11] simplified data names and added exploration and description report templates --- .../reports/clean/clean.Rmd | 22 +++++++++++++++++++ .../reports/wrangle/wrangle.Rmd | 22 +++++++++++++++++++ .../src/clean/clean.Rmd | 4 ++-- .../src/model/model.Rmd | 2 +- .../src/wrangle/wrangle.Rmd | 6 ++--- 5 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 {{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd create mode 100644 {{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd diff --git a/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd new file mode 100644 index 0000000..172bc6e --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd @@ -0,0 +1,22 @@ +# Load Libraries +```{r message=FALSE} +library(dlookr) +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "example" +``` + +# Read Data +```{r} +clean_data <- read_rds(here("data", "clean", glue("{data_name}.rds"))) +``` + +# Describe Data +```{r} +diagnose_web_report(clean_data) +``` diff --git a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd new file mode 100644 index 0000000..41beac3 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd @@ -0,0 +1,22 @@ +# Load Libraries +```{r message=FALSE} +library(dlookr) +library(glue) +library(here) +library(tidyverse) +``` + +# Setup +```{r} +data_name <- "example" +``` + +# Read Data +```{r} +wrangle_data <- read_rds(here("data", "wrangle", glue("{data_name}.rds"))) +``` + +# Explore Data +```{r} +diagnose_web_report(wrangle_data) +``` diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd index d903493..e1f8b47 100644 --- a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd @@ -12,7 +12,7 @@ data_name <- "example" # Read Data ```{r} -raw_data <- read_csv(here("data", "raw", glue("raw-{data_name}-data.csv"))) +raw_data <- read_csv(here("data", "raw", glue("{data_name}.csv"))) ``` # Clean Data @@ -22,5 +22,5 @@ clean_data <- raw_data # Write Data ```{r} -write_rds(clean_data, here("data", "clean", glue("clean-{data_name}-data.rds"))) +write_rds(clean_data, here("data", "clean", glue("{data_name}.rds"))) ``` diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd index b054900..5de0c89 100644 --- a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd @@ -12,5 +12,5 @@ data_name <- "example" # Read Data ```{r} -wrangle_data <- read_rds(here("data", "wrangle", glue("wrangle-{data_name}-data.rds"))) +wrangle_data <- read_rds(here("data", "wrangle", glue("{data_name}.rds"))) ``` diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd index ae085a6..652ee88 100644 --- a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd @@ -12,7 +12,7 @@ data_name <- "example" # Read Data ```{r} -clean_data <- read_rds(here("data", "clean", glue("clean-{data_name}-data.rds"))) +clean_data <- read_rds(here("data", "clean", glue("{data_name}.rds"))) ``` # Wrangle Data @@ -22,5 +22,5 @@ wrangle_data <- clean_data # Write Data ```{r} -write_rds(wrangle_data, here("data", "wrangle", glue("wrangle-{data_name}-data.rds"))) -``` \ No newline at end of file +write_rds(wrangle_data, here("data", "wrangle", glue("{data_name}.rds"))) +``` From 4fe339c0a71728b1c50372f4ba2b2016d2a52e5c Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Thu, 2 Feb 2023 14:48:36 +0000 Subject: [PATCH 07/11] empty python scripts added and posthook script modified to removed unneeded scripts --- cookiecutter.json | 1 + hooks/post_gen_project.py | 8 ++++++++ .../reports/clean/clean.Rmd | 8 +------- .../reports/clean/clean.py | 0 .../reports/wrangle/wrangle.Rmd | 8 +------- .../reports/wrangle/wrangle.py | 0 .../src/clean/clean.Rmd | 2 +- .../src/clean/clean.py | 0 .../src/model/model.Rmd | 2 +- .../src/model/model.py | 0 .../src/wrangle/wrangle.Rmd | 2 +- .../src/wrangle/wrangle.py | 0 .../{{cookiecutter.project_directory_name}}.Rproj | 13 +++++++++++++ 13 files changed, 27 insertions(+), 17 deletions(-) create mode 100644 {{cookiecutter.project_directory_name}}/reports/clean/clean.py create mode 100644 {{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py create mode 100644 {{cookiecutter.project_directory_name}}/src/clean/clean.py create mode 100644 {{cookiecutter.project_directory_name}}/src/model/model.py create mode 100644 {{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py create mode 100644 {{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj diff --git a/cookiecutter.json b/cookiecutter.json index 1688d25..c30c441 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -7,6 +7,7 @@ "project_sponsor_email": "Project Sponsor Email", "project_summary": "Project Summary", "raw_data_directory": "data/raw", + "lang_project": ["Python", "R"], "venv_project": ["No", "Yes"], "git_project": ["No", "Yes"] } diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index 4c01e05..b7ecb7c 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -2,6 +2,14 @@ venv_project = "{{cookiecutter.venv_project}}" git_project = "{{cookiecutter.git_project}}" +lang_project = "{{cookiecutter.lang_projecdt}}" + +# create lang project +if lang_project == "Python": + subprocess.run(["rm", "{reports/src}/**/*.Rmd"], stdout=subprocess.DEVNULL) + subprocess.run(["rm", "{{cookiecutter.project_directory_name}}.Rproj"], stdout=subprocess.DEVNULL) +elif lang_project == "R": + subprocess.run(["rm", "{reports/src}/**/*.py"], stdout=subprocess.DEVNULL) # create venv project if venv_project == "Yes": diff --git a/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd index 172bc6e..1b398bf 100644 --- a/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd +++ b/{{cookiecutter.project_directory_name}}/reports/clean/clean.Rmd @@ -1,6 +1,5 @@ # Load Libraries ```{r message=FALSE} -library(dlookr) library(glue) library(here) library(tidyverse) @@ -8,15 +7,10 @@ library(tidyverse) # Setup ```{r} -data_name <- "example" +data_name <- "" ``` # Read Data ```{r} clean_data <- read_rds(here("data", "clean", glue("{data_name}.rds"))) ``` - -# Describe Data -```{r} -diagnose_web_report(clean_data) -``` diff --git a/{{cookiecutter.project_directory_name}}/reports/clean/clean.py b/{{cookiecutter.project_directory_name}}/reports/clean/clean.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd index 41beac3..567ba71 100644 --- a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd +++ b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.Rmd @@ -1,6 +1,5 @@ # Load Libraries ```{r message=FALSE} -library(dlookr) library(glue) library(here) library(tidyverse) @@ -8,15 +7,10 @@ library(tidyverse) # Setup ```{r} -data_name <- "example" +data_name <- "" ``` # Read Data ```{r} wrangle_data <- read_rds(here("data", "wrangle", glue("{data_name}.rds"))) ``` - -# Explore Data -```{r} -diagnose_web_report(wrangle_data) -``` diff --git a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd index e1f8b47..9bc9069 100644 --- a/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.Rmd @@ -7,7 +7,7 @@ library(tidyverse) # Setup ```{r} -data_name <- "example" +data_name <- "" ``` # Read Data diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.py b/{{cookiecutter.project_directory_name}}/src/clean/clean.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd index 5de0c89..567ba71 100644 --- a/{{cookiecutter.project_directory_name}}/src/model/model.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/model/model.Rmd @@ -7,7 +7,7 @@ library(tidyverse) # Setup ```{r} -data_name <- "example" +data_name <- "" ``` # Read Data diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.py b/{{cookiecutter.project_directory_name}}/src/model/model.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd index 652ee88..ee8d924 100644 --- a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd +++ b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.Rmd @@ -7,7 +7,7 @@ library(tidyverse) # Setup ```{r} -data_name <- "example" +data_name <- "" ``` # Read Data diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj b/{{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj new file mode 100644 index 0000000..02e3b31 --- /dev/null +++ b/{{cookiecutter.project_directory_name}}/{{cookiecutter.project_directory_name}}.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX \ No newline at end of file From 8ce9482bb21dc70f091856861097167cb78c1214 Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Thu, 2 Feb 2023 15:25:29 +0000 Subject: [PATCH 08/11] fixed the post_hook to remove .Rproj file when an R project is required --- hooks/post_gen_project.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index b7ecb7c..b29593f 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -1,15 +1,21 @@ import subprocess +import glob +import os venv_project = "{{cookiecutter.venv_project}}" git_project = "{{cookiecutter.git_project}}" -lang_project = "{{cookiecutter.lang_projecdt}}" +lang_project = "{{cookiecutter.lang_project}}" -# create lang project +# create Python project if lang_project == "Python": - subprocess.run(["rm", "{reports/src}/**/*.Rmd"], stdout=subprocess.DEVNULL) - subprocess.run(["rm", "{{cookiecutter.project_directory_name}}.Rproj"], stdout=subprocess.DEVNULL) -elif lang_project == "R": - subprocess.run(["rm", "{reports/src}/**/*.py"], stdout=subprocess.DEVNULL) + os.remove("{{cookiecutter.project_directory_name}}.Rproj") + for file in glob.glob("**/*.Rmd", recursive=True): + os.remove(file) + +# create R project +if lang_project == "R": + for file in glob.glob("**/*.py", recursive=True): + os.remove(file) # create venv project if venv_project == "Yes": From 6da4e8699f7877151c0d3493e41790fb29818488 Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Thu, 2 Feb 2023 15:31:13 +0000 Subject: [PATCH 09/11] removed notebooks folder and changed lang_project to language in the json config file --- README.md | 6 +++--- cookiecutter.json | 2 +- hooks/post_gen_project.py | 6 +++--- {{cookiecutter.project_directory_name}}/notebooks/.gitkeep | 0 4 files changed, 7 insertions(+), 7 deletions(-) delete mode 100644 {{cookiecutter.project_directory_name}}/notebooks/.gitkeep diff --git a/README.md b/README.md index a5037ec..de7ec4a 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,9 @@ You will be prompted for ten inputs: 6. Project Sponsor Email 7. Project Summary 8. Raw Data Directory -9. `venv` Project (No / Yes) -10. `git` Project (No / Yes) +9. Language (Python / R) +10. `venv` Project (No / Yes) +11. `git` Project (No / Yes) ## Organization @@ -32,7 +33,6 @@ data/ ├─ model/ ├─ raw/ ├─ wrangle/ -notebooks/ reports/ ├─ clean/ ├─ final/ diff --git a/cookiecutter.json b/cookiecutter.json index c30c441..d83e8a0 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -7,7 +7,7 @@ "project_sponsor_email": "Project Sponsor Email", "project_summary": "Project Summary", "raw_data_directory": "data/raw", - "lang_project": ["Python", "R"], + "language": ["Python", "R"], "venv_project": ["No", "Yes"], "git_project": ["No", "Yes"] } diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index b29593f..1e3f979 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -4,16 +4,16 @@ venv_project = "{{cookiecutter.venv_project}}" git_project = "{{cookiecutter.git_project}}" -lang_project = "{{cookiecutter.lang_project}}" +language = "{{cookiecutter.language}}" # create Python project -if lang_project == "Python": +if language == "Python": os.remove("{{cookiecutter.project_directory_name}}.Rproj") for file in glob.glob("**/*.Rmd", recursive=True): os.remove(file) # create R project -if lang_project == "R": +if language == "R": for file in glob.glob("**/*.py", recursive=True): os.remove(file) diff --git a/{{cookiecutter.project_directory_name}}/notebooks/.gitkeep b/{{cookiecutter.project_directory_name}}/notebooks/.gitkeep deleted file mode 100644 index e69de29..0000000 From b40f29ce17c826d64c9158b12ef31220e046fce8 Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Thu, 2 Feb 2023 16:09:11 +0000 Subject: [PATCH 10/11] added python template scripts --- .../reports/clean/clean.py | 10 ++++++++++ .../reports/wrangle/wrangle.py | 10 ++++++++++ .../src/clean/clean.py | 16 ++++++++++++++++ .../src/model/model.py | 10 ++++++++++ .../src/wrangle/wrangle.py | 16 ++++++++++++++++ 5 files changed, 62 insertions(+) diff --git a/{{cookiecutter.project_directory_name}}/reports/clean/clean.py b/{{cookiecutter.project_directory_name}}/reports/clean/clean.py index e69de29..63614ea 100644 --- a/{{cookiecutter.project_directory_name}}/reports/clean/clean.py +++ b/{{cookiecutter.project_directory_name}}/reports/clean/clean.py @@ -0,0 +1,10 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +clean_data = pandas.read_pickle(os.path.join(here(), "data", "clean", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py index e69de29..ddc00f3 100644 --- a/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py +++ b/{{cookiecutter.project_directory_name}}/reports/wrangle/wrangle.py @@ -0,0 +1,10 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +wrangle_data = pandas.read_pickle(os.path.join(here(), "data", "wrangle", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/src/clean/clean.py b/{{cookiecutter.project_directory_name}}/src/clean/clean.py index e69de29..2fa01e2 100644 --- a/{{cookiecutter.project_directory_name}}/src/clean/clean.py +++ b/{{cookiecutter.project_directory_name}}/src/clean/clean.py @@ -0,0 +1,16 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +raw_data = pandas.read_csv(os.path.join(here(), "data", "raw", f"{data_name}.csv")) + +#%% Clean Data +clean_data = raw_data + +#%% Write Data +clean_data.to_pickle(os.path.join(here(), "data", "clean", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/src/model/model.py b/{{cookiecutter.project_directory_name}}/src/model/model.py index e69de29..ddc00f3 100644 --- a/{{cookiecutter.project_directory_name}}/src/model/model.py +++ b/{{cookiecutter.project_directory_name}}/src/model/model.py @@ -0,0 +1,10 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +wrangle_data = pandas.read_pickle(os.path.join(here(), "data", "wrangle", f"{data_name}.pkl")) diff --git a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py index e69de29..cebc583 100644 --- a/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py +++ b/{{cookiecutter.project_directory_name}}/src/wrangle/wrangle.py @@ -0,0 +1,16 @@ +#%% Load Libraries +import pandas +from pyprojroot import here +import os + +#%% Setup +data_name = "" + +#%% Read Data +clean_data = pandas.read_pickle(os.path.join(here(), "data", "clean", f"{data_name}.pkl")) + +#%% Clean Data +wrangle_data = clean_data + +#%% Write Data +wrangle_data.to_pickle(os.path.join(here(), "data", "wrangle", f"{data_name}.pkl")) From 965fc1b645925305b398f082dc2f685fb88b0506 Mon Sep 17 00:00:00 2001 From: Matthew Edwards Date: Thu, 2 Feb 2023 16:11:41 +0000 Subject: [PATCH 11/11] updated the number of imputs in the README file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de7ec4a..5f2c8a1 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ pip install cookiecutter cookiecutter https://github.com/NICD-UK/project-template ``` -You will be prompted for ten inputs: +You will be prompted for eleven inputs: 1. Project Name 2. Project Directory Name