-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathemfac_postproc.py
116 lines (94 loc) · 4.55 KB
/
emfac_postproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
USAGE = """
This script is to be run after EMFAC is completed (any number of times).
Run from the model run directory on M.
This script will find all the EMFAC output files and extract relevant information, writing a summary file
with all information included to OUTPUT\\emfac\\emfac_summary.csv
Columns are:
- analysis_type
- source
- emfac_version
- season
- sb375 = bool
- planning = bool
- run_timestamp
- Veh_Tech
- VMT
- Trips
- CO2_RUNEX
- CO2_IDLEX
- CO2_STREX
- CO2_TOTEX
"""
import os, pathlib, re, sys
import openpyxl
import pandas as pd
# -------------------------------------------------------------------
# Input/output file names and locations
# -------------------------------------------------------------------
CWD = pathlib.Path.cwd()
MODEL_RUN_ID = CWD.name # run this from M_DIR or model run dir on modeling machine
CWD_OUTPUT_EMFAC = CWD / "OUTPUT/emfac"
OUTPUT_FILE = CWD_OUTPUT_EMFAC / "emfac_summary.csv"
# columns to extract
EXTRACT_COLUMNS = ['Veh_Tech', 'VMT', 'Trips', 'CO2_RUNEX', 'CO2_IDLEX', 'CO2_STREX', 'CO2_TOTEX', 'PM2_5_TOTAL']
# emfac output file ends with timestamp: _YYYYMMDDHHMMSS.xlsx e.g. _20240410131422.xlsx
# by convention, it's E[emfacversion]_[model_run_id]_[season][_sb375?]_[YYYYMMDDHHMMSS].xlsx
EMFAC_OUTPUT_STR = f"E(?P<emfac_version>2014|2017|2017web|2021|2021web)_{MODEL_RUN_ID}_(?P<season>annual|summer|winter)(?P<planning>_planning)?(?P<sb375>_sb375)?_(?P<run_timestamp>\d{{14}}).xlsx"
EMFAC_OUTPUT_RE = re.compile(EMFAC_OUTPUT_STR)
if __name__ == '__main__':
print(f"MODEL_RUN_ID = {MODEL_RUN_ID}")
print(f"CWD_OUTPUT_EMFAC = {CWD_OUTPUT_EMFAC}")
print(f"EMFAC_OUTPUT_STR = {EMFAC_OUTPUT_STR}")
print("")
all_emfac_run_data = [] # list of dicts
emfac_xlsx_files = sorted(CWD_OUTPUT_EMFAC.glob("**/*.xlsx"))
for emfac_xlsx_file in emfac_xlsx_files:
relative_emfac_filepath = emfac_xlsx_file.relative_to(CWD_OUTPUT_EMFAC)
print(f"Checking {relative_emfac_filepath}")
match = EMFAC_OUTPUT_RE.match(emfac_xlsx_file.name)
if match == None:
print(" -> not an EMFAC output file")
continue
# fetch metadata
emfac_run_data = {
'analysis_type': 'unknown',
'source': str(emfac_xlsx_file),
}
if relative_emfac_filepath.parts[0] in ['SB375','EIR','AQConformity']:
emfac_run_data['analysis_type'] = relative_emfac_filepath.parts[0]
emfac_run_data['emfac_version'] = match.group('emfac_version')
emfac_run_data['season'] = match.group('season')
emfac_run_data['sb375' ] = True if match.group('sb375')=="_sb375" else False
emfac_run_data['planning' ] = True if match.group('planning')=="_planning" else False
emfac_run_data['run_timestamp'] = match.group('run_timestamp')
# Load workbook
workbook = openpyxl.load_workbook(filename=emfac_xlsx_file)
print(f" workbook.sheetnames:{workbook.sheetnames}")
if 'Total MTC' in workbook.sheetnames:
print(f" Reading 'Total MTC' worksheet")
total_mtc_df = pd.read_excel(emfac_xlsx_file, sheet_name="Total MTC")
# remove leading or trailing spaces
total_mtc_df.Veh_Tech = total_mtc_df.Veh_Tech.str.strip()
total_mtc_columns = total_mtc_df.columns
# print(f"total_mtc_columns = {total_mtc_columns}")
# select row with All Vehicles
total_mtc_df = total_mtc_df.loc[ total_mtc_df.Veh_Tech == 'All Vehicles', :]
assert(len(total_mtc_df)==1)
all_veh_series = total_mtc_df.squeeze()
# print(all_veh_series)
for extract_column in EXTRACT_COLUMNS:
# in EMFAC2021, VMT is 'Total_VMT'
emfac_extract_column = extract_column
if (match.group('emfac_version') in ['2021','2021web']) and (extract_column == 'VMT'):
emfac_extract_column = 'Total_VMT'
if emfac_extract_column in total_mtc_columns:
emfac_run_data[extract_column] = all_veh_series[emfac_extract_column]
all_emfac_run_data.append(emfac_run_data)
continue
if len(all_emfac_run_data) == 0:
print("Did not find any EMFAC output workbooks to process")
sys.exit(0)
# write it
all_emfac_run_data_df = pd.DataFrame(all_emfac_run_data)
all_emfac_run_data_df.to_csv(OUTPUT_FILE, index=False)
print(f"Wrote {len(all_emfac_run_data_df)} rows to {OUTPUT_FILE}")