Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Sep 4, 2024
1 parent bab1673 commit df2ccb5
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 15 deletions.
20 changes: 10 additions & 10 deletions m5-forecasting-feature-engineering/pandas_queries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import time
from pathlib import Path

import numpy as np
import pandas as pd
Expand All @@ -20,20 +20,20 @@
# Set this to True if you just want to test that everything runs
SMALL = True
if SMALL:
PATH = os.path.join(PROCESSED_DATA_DIR, "grid_part_1_small.parquet")
PATH = Path(PROCESSED_DATA_DIR) / "grid_part_1_small.parquet"
else:
PATH = os.path.join(PROCESSED_DATA_DIR, "grid_part_1.parquet")
PATH = Path(PROCESSED_DATA_DIR) / "grid_part_1.parquet"

LAG_DAYS = [col for col in range(SHIFT_DAY, SHIFT_DAY + 15)]
LAG_DAYS = list(range(SHIFT_DAY, SHIFT_DAY + 15))


def q1_pandas(df):
return df.assign(
**{
f"{TARGET}_lag_{l}": df.groupby(["id"], observed=True)[TARGET].transform(
lambda x: x.shift(l)
f"{TARGET}_lag_{lag}": df.groupby(["id"], observed=True)[TARGET].transform(
lambda x: x.shift(lag) # noqa: B023
)
for l in LAG_DAYS
for lag in LAG_DAYS
}
)

Expand All @@ -42,11 +42,11 @@ def q2_pandas(df):
for i in [7, 14, 30, 60, 180]:
df["rolling_mean_" + str(i)] = df.groupby(["id"], observed=True)[
TARGET
].transform(lambda x: x.shift(SHIFT_DAY).rolling(i).mean())
].transform(lambda x: x.shift(SHIFT_DAY).rolling(i).mean()) # noqa: B023
for i in [7, 14, 30, 60, 180]:
df["rolling_std_" + str(i)] = df.groupby(["id"], observed=True)[
TARGET
].transform(lambda x: x.shift(SHIFT_DAY).rolling(i).std())
].transform(lambda x: x.shift(SHIFT_DAY).rolling(i).std()) # noqa: B023
return df


Expand All @@ -55,7 +55,7 @@ def q3_pandas(df):
for d_window in [7, 14, 30, 60]:
col_name = "rolling_mean_" + str(d_shift) + "_" + str(d_window)
df[col_name] = df.groupby(["id"], observed=True)[TARGET].transform(
lambda x: x.shift(d_shift).rolling(d_window).mean()
lambda x: x.shift(d_shift).rolling(d_window).mean() # noqa: B023
)
return df

Expand Down
12 changes: 7 additions & 5 deletions m5-forecasting-feature-engineering/polars_queries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import time
from pathlib import Path

import polars as pl

Expand All @@ -10,19 +10,21 @@
TARGET = "sales"
SHIFT_DAY = 28


# Set this to True if you just want to test that everything runs
SMALL = True
if SMALL:
PATH = os.path.join(PROCESSED_DATA_DIR, "grid_part_1_small.parquet")
PATH = Path(PROCESSED_DATA_DIR) / "grid_part_1_small.parquet"
else:
PATH = os.path.join(PROCESSED_DATA_DIR, "grid_part_1.parquet")
PATH = Path(PROCESSED_DATA_DIR) / "grid_part_1.parquet"

LAG_DAYS = [col for col in range(SHIFT_DAY, SHIFT_DAY + 15)]
LAG_DAYS = list(range(SHIFT_DAY, SHIFT_DAY + 15))


def q1_polars(df):
return df.with_columns(
pl.col(TARGET).shift(l).over("id").alias(f"{TARGET}_lag_{l}") for l in LAG_DAYS
pl.col(TARGET).shift(lag).over("id").alias(f"{TARGET}_lag_{lag}")
for lag in LAG_DAYS
)


Expand Down

0 comments on commit df2ccb5

Please sign in to comment.