Skip to content

Commit

Permalink
FIX-#2456: update taxi queries with .copy usage (#2457)
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Nov 20, 2020
1 parent 03dbbef commit 80125c1
Showing 1 changed file with 8 additions and 13 deletions.
21 changes: 8 additions & 13 deletions examples/docker/nyc-taxi.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,14 @@ def q2(df):
return df.groupby("passenger_count", as_index=False).mean()[["passenger_count", "total_amount"]]

def q3(df):
transformed = pd.DataFrame({
"passenger_count": df["passenger_count"],
"pickup_datetime": df["pickup_datetime"].dt.year,
})
return transformed.groupby(["pickup_datetime", "passenger_count"]).agg({"passenger_count": ["count"]})
df["pickup_datetime"] = df["pickup_datetime"].dt.year
return df.groupby(["pickup_datetime", "passenger_count"]).size().reset_index()


def q4(df):
transformed = pd.DataFrame({
"passenger_count": df["passenger_count"],
"pickup_datetime": df["pickup_datetime"].dt.year,
"trip_distance": df["trip_distance"].astype("int64"),
})
return transformed.groupby(["passenger_count", "pickup_datetime", "trip_distance"]) \
df["pickup_datetime"] = df["pickup_datetime"].dt.year
df["trip_distance"] = df["trip_distance"].astype("int64")
return df.groupby(["passenger_count", "pickup_datetime", "trip_distance"]) \
.size().reset_index().sort_values(by=["pickup_datetime", 0], ascending=[True, False])

def measure(name, func, *args, **kw):
Expand All @@ -66,8 +61,8 @@ def main():
df = measure('Reading', read)
measure('Q1', q1, df)
measure('Q2', q2, df)
measure('Q3', q3, df)
measure('Q4', q4, df)
measure('Q3', q3, df.copy())
measure('Q4', q4, df.copy())

if __name__ == '__main__':
main()
Expand Down

0 comments on commit 80125c1

Please sign in to comment.