Skip to content

Commit

Permalink
Misc improvements (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
olejandro authored Feb 13, 2024
1 parent 8d066d0 commit 3720c7e
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 69 deletions.
4 changes: 2 additions & 2 deletions xl2times/config/times_mapping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ALL_REG[ALL_REG] = AllRegions(Region)
ALL_TS[ALL_TS] = TimeSlices(TS)
B[DATAYEAR,VALUE] = TimePeriods(Year,B)
COM[COM] = Commodities(Commodity)
COM_DESC[REG,COM,TEXT] = Commodities(Region,Commodity,CommDesc)
COM_DESC[REG,COM,TEXT] = Commodities(Region,Commodity,Description)
COM_GMAP[REG,COM_GRP,COM] = CommodityGroupMap(Region,CommodityGroup,Commodity)
COM_GRP[COM_GRP] = CommodityGroups(CommodityGroup)
COM_LIM[REG,COM,BD] = Commodities(Region,Commodity,LimType)
Expand All @@ -22,7 +22,7 @@ NRG_TMAP[REG,NRG_TYPE,COM] = Commodities(Region,Ctype,Commodity)
PASTYEAR[DATAYEAR,TEXT] = PastYears(Year,Year)
PRC[PRC] = Processes(Process)
PRC_ACTUNT[REG,PRC,COM_GRP,UNITS] = Processes(Region,Process,PrimaryCG,Tact)
PRC_DESC[REG,PRC,TEXT] = Processes(Region,Process,TechDesc)
PRC_DESC[REG,PRC,TEXT] = Processes(Region,Process,Description)
PRC_DSCNCAP[REG,PRC] = Attributes(Region,Process, Attribute:NCAP_DISC)
PRC_MAP[REG,PRC_GRP,PRC] = Processes(Region,Sets,Process)
PRC_TSL[REG,PRC,TSLVL] = Processes(Region,Process,Tslvl)
Expand Down
4 changes: 2 additions & 2 deletions xl2times/config/veda-tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"aliases": [
"description"
],
"use_name": "commdesc",
"use_name": "description",
"row_ignore_symbol": [
"\\I:",
"*"
Expand Down Expand Up @@ -241,7 +241,7 @@
"aliases": [
"description"
],
"use_name": "techdesc",
"use_name": "description",
"row_ignore_symbol": [
"\\I:",
"*"
Expand Down
111 changes: 54 additions & 57 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def process_flexible_import_tables(
),
"region": model.internal_regions,
"currency": utils.single_column(tables, datatypes.Tag.currencies, "currency"),
"other_indexes": {"INPUT", "OUTPUT"},
"other_indexes": {"INPUT", "OUTPUT", "DEMO", "DEMI"},
}

def get_colname(value):
Expand Down Expand Up @@ -1220,7 +1220,7 @@ def expand_pcg_from_suffix(df):
"sets",
"region",
"process",
"techdesc",
"description",
"tact",
"tcap",
"tslvl",
Expand Down Expand Up @@ -1478,7 +1478,7 @@ def generate_dummy_processes(

process_declarations = pd.DataFrame(
dummy_processes,
columns=["sets", "process", "techdesc", "tact", "tcap", "primarycg"],
columns=["sets", "process", "description", "tact", "tcap", "primarycg"],
)

tables.append(
Expand All @@ -1492,7 +1492,7 @@ def generate_dummy_processes(
)
)

process_data_specs = process_declarations[["process", "techdesc"]].copy()
process_data_specs = process_declarations[["process", "description"]].copy()
# Use this as default activity cost for dummy processes
# TODO: Should this be included in settings instead?
process_data_specs["ACTCOST"] = 1111
Expand Down Expand Up @@ -1830,7 +1830,7 @@ def get_matching_processes(row, dictionary):
]:
if row[col] is not None:
matching_processes = intersect(
matching_processes, filter_by_pattern(dictionary[key], row[col])
matching_processes, filter_by_pattern(dictionary[key], row[col].upper())
)
if matching_processes is not None and any(matching_processes.duplicated()):
raise ValueError("duplicated")
Expand All @@ -1846,70 +1846,67 @@ def get_matching_commodities(row, dictionary):
]:
if row[col] is not None:
matching_commodities = intersect(
matching_commodities, filter_by_pattern(dictionary[key], row[col])
matching_commodities,
filter_by_pattern(dictionary[key], row[col].upper()),
)
return matching_commodities


def df_indexed_by_col(df, col):
# Set df index using an existing column; make index is uppercase
df = df.dropna().drop_duplicates()
index = df[col].str.upper()
df = df.set_index(index).rename_axis("index")

if len(df.columns) > 1:
df = df.drop(columns=col)
return df


def generate_topology_dictionary(
tables: Dict[str, DataFrame], model: datatypes.TimesModel
) -> Dict[str, DataFrame]:
# We need to be able to fetch processes based on any combination of name, description, set, comm-in, or comm-out
# So we construct tables whose indices are names, etc. and use pd.filter

dictionary = dict()
pros = model.processes
coms = model.commodities
pros_and_coms = tables[datatypes.Tag.fi_t]

dictionary["processes_by_name"] = (
model.processes[["process"]]
.dropna()
.drop_duplicates()
.set_index("process", drop=False)
.rename_axis("index")
)
dictionary["processes_by_desc"] = (
model.processes[["process", "techdesc"]]
.dropna()
.drop_duplicates()
.set_index("techdesc")
)
dictionary["processes_by_sets"] = (
model.processes[["process", "sets"]]
.dropna()
.drop_duplicates()
.set_index("sets")
)
processes_and_commodities = tables[datatypes.Tag.fi_t]
dictionary["processes_by_comm_in"] = (
processes_and_commodities[["process", "commodity-in"]]
.dropna()
.drop_duplicates()
.set_index("commodity-in")
)
dictionary["processes_by_comm_out"] = (
processes_and_commodities[["process", "commodity-out"]]
.dropna()
.drop_duplicates()
.set_index("commodity-out")
)
dictionary["commodities_by_name"] = (
model.commodities[["commodity"]]
.dropna()
.drop_duplicates()
.set_index("commodity", drop=False)
.rename_axis("index")
)
dictionary["commodities_by_desc"] = (
model.commodities[["commodity", "commdesc"]]
.dropna()
.drop_duplicates()
.set_index("commdesc")
)
dictionary["commodities_by_sets"] = (
model.commodities[["commodity", "csets"]]
.dropna()
.drop_duplicates()
.set_index("csets")
)
dict_info = [
{"key": "processes_by_name", "df": pros[["process"]], "col": "process"},
{
"key": "processes_by_desc",
"df": pros[["process", "description"]],
"col": "description",
},
{"key": "processes_by_sets", "df": pros[["process", "sets"]], "col": "sets"},
{
"key": "processes_by_comm_in",
"df": pros_and_coms[["process", "commodity-in"]],
"col": "commodity-in",
},
{
"key": "processes_by_comm_out",
"df": pros_and_coms[["process", "commodity-out"]],
"col": "commodity-out",
},
{"key": "commodities_by_name", "df": coms[["commodity"]], "col": "commodity"},
{
"key": "commodities_by_desc",
"df": coms[["commodity", "description"]],
"col": "description",
},
{
"key": "commodities_by_sets",
"df": coms[["commodity", "csets"]],
"col": "csets",
},
]

for entry in dict_info:
dictionary[entry["key"]] = df_indexed_by_col(entry["df"], entry["col"])

return dictionary

Expand Down
11 changes: 3 additions & 8 deletions xl2times/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,22 +186,17 @@ def create_regexp(pattern):
pattern = remove_negative_patterns(pattern)
if len(pattern) == 0:
return re.compile(pattern) # matches everything
# escape special characters
# Backslash must come first
special = "\\.|^$+()[]{}"
for c in special:
pattern = pattern.replace(c, "\\" + c)
# Handle VEDA wildcards
pattern = pattern.replace("*", ".*").replace("?", ".").replace(",", "|")
pattern = pattern.replace("*", ".*").replace("?", ".").replace(",", r"$|^")
# Do not match substrings
pattern = "^" + pattern + "$"
pattern = rf"^{pattern}$"
return re.compile(pattern)


def create_negative_regexp(pattern):
pattern = remove_positive_patterns(pattern)
if len(pattern) == 0:
pattern = "^$" # matches nothing
pattern = r"^$" # matches nothing
return create_regexp(pattern)


Expand Down

0 comments on commit 3720c7e

Please sign in to comment.