Skip to content

Commit

Permalink
Fix negative production in dataset (#101)
Browse files Browse the repository at this point in the history
  • Loading branch information
rangoiv authored Apr 4, 2024
1 parent e91948c commit a745270
Showing 1 changed file with 44 additions and 30 deletions.
74 changes: 44 additions & 30 deletions dataset/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
df = df.rename(columns={"Precipitable Water": "Rain"})
df = df.rename(columns={"Wind Speed": "Wind"})
df = df.rename(columns={"Temperature": "Temp"})

df["Wind"] = df["Wind"].rolling(3).mean()
df["Wind"] = df["Wind"].rolling(3).mean()
df["UV"] = df["UV"].rolling(3).mean()

# load energy data
energy = pd.read_csv("data/DUQ_hourly.csv")
Expand Down Expand Up @@ -75,12 +77,19 @@

# Prepare data for csv -> these go directly to game and sql
def prepare_chunk(df: pd.DataFrame) -> pd.DataFrame:
def mavg_noise(box_size, size):
noise = np.random.normal(0, 1, size + box_size - 1)
return np.convolve(np.ones(box_size) / box_size, noise, mode="valid")
def from_0_to_1(col):
return (col - col.min()) / (col.max() - col.min())

def mavg_noise(size):
box_size_1 = 128
box_size_2 = 32
noise = np.random.uniform(0, 1, size + box_size_1 + box_size_2 - 2)
noise = np.convolve(np.ones(box_size_1) / box_size_1, noise, mode="valid")
noise = np.convolve(np.ones(box_size_2) / box_size_2, noise, mode="valid")
return 2*from_0_to_1(noise)-1

def norm_col(col):
return col / col[:1800].mean()
return col / col[50:1850].mean()

def add_linear(col):
end = 0.12 * len(col) / 1800
Expand Down Expand Up @@ -108,11 +117,11 @@ def add_linear(col):
)
new_df["date"] = df.index.to_series()

new_df["COAL"] = mavg_noise(24, len(df)) * 0.02
new_df["URANIUM"] = mavg_noise(24, len(df)) * 0.02
new_df["BIOMASS"] = mavg_noise(24, len(df)) * 0.02
new_df["GAS"] = mavg_noise(24, len(df)) * 0.02
new_df["OIL"] = mavg_noise(24, len(df)) * 0.02
new_df["COAL"] = 1 + mavg_noise(len(df)) * 0.03
new_df["URANIUM"] = 1 + mavg_noise(len(df)) * 0.03
new_df["BIOMASS"] = 1 + mavg_noise(len(df)) * 0.03
new_df["GAS"] = 1 + mavg_noise(len(df)) * 0.03
new_df["OIL"] = 1 + mavg_noise(len(df)) * 0.03

new_df["GEOTHERMAL"] = df["Rain"]
new_df["WIND"] = df["Wind"]
Expand All @@ -138,37 +147,39 @@ def add_linear(col):
new_df[col] = new_df[col].apply(lambda x: int(x))
new_df[col] = new_df[col].astype(int)

l = []
for col in new_df.columns:
if col == "date":
continue

# plt.plot(new_df[col], label=col)
l.append(new_df[col].sum())

# plt.title("Dataset outputs")
# plt.title("Power plants outputs")
# graph_df = new_df.iloc[:1800, :]
# plt.plot(graph_df["COAL"], label="COAL")
# plt.plot(graph_df["URANIUM"], label="URANIUM")
# plt.plot(graph_df["BIOMASS"], label="BIO")
# plt.plot(graph_df["GAS"], label="GAS")
# plt.plot(graph_df["OIL"], label="OIL")
# plt.plot(graph_df["GEOTHERMAL"], label="GEOTHERMAL")
# plt.plot(graph_df["WIND"], label="WIND")
# plt.plot(graph_df["SOLAR"], label="SOLAR")
# plt.plot(graph_df["HYDRO"], label="HYDRO")
# plt.legend()
# plt.show()

# plt.bar(new_df.columns[1:], l)
# plt.title("Dataset outputs sum")

# plt.title("Market prices")
# graph_df = new_df.iloc[50:1850:3, :]
# plt.plot(graph_df["COAL_PRICE"], label="COAL")
# plt.plot(graph_df["URANIUM_PRICE"], label="URANIUM")
# plt.plot(graph_df["BIOMASS_PRICE"], label="BIO")
# plt.plot(graph_df["GAS_PRICE"], label="GAS")
# plt.plot(graph_df["OIL_PRICE"], label="OIL")
# plt.legend()
# plt.show()
graph_df = new_df.iloc[:1800:3, :]
plt.plot(graph_df["COAL_PRICE"], label="COAL")
plt.plot(graph_df["URANIUM_PRICE"], label="URANIUM")
plt.plot(graph_df["BIOMASS_PRICE"], label="BIO")
plt.plot(graph_df["GAS_PRICE"], label="GAS")
plt.plot(graph_df["OIL_PRICE"], label="OIL")
plt.legend()
plt.show()
# plt.show()

# plt.title("Energy prices and demand")
# plt.plot(new_df["ENERGY_DEMAND"][:1800])
# plt.plot(new_df["MAX_ENERGY_PRICE"])
# plt.title("PRICES")
# plt.show()

return new_df
return new_df.iloc[50:, :]


for name, group in groups:
Expand All @@ -186,6 +197,9 @@ def add_linear(col):
assert all(time_delta == pd.Timedelta("60 minutes"))

group = prepare_chunk(group)
# print(group.iloc[:1800, :].min())
# print(group.iloc[:1800, :].max())
# print(group.iloc[:1800, :].mean())

group.to_csv(
f"chunks/df_{len(group)}_{group.index[0]}_{group.index[-1]}.csv", index=False
Expand Down

0 comments on commit a745270

Please sign in to comment.