diff --git a/polars_queries/q2.py b/polars_queries/q2.py index 5b13590..b58fefb 100644 --- a/polars_queries/q2.py +++ b/polars_queries/q2.py @@ -53,7 +53,7 @@ def q(): reverse=[True, False, False, False], ) .limit(100) - .with_column(pl.col(pl.datatypes.Utf8).str.strip().keep_name()) + .with_columns(pl.col(pl.datatypes.Utf8).str.strip().keep_name()) ) utils.run_query(Q_NUM, q_final) diff --git a/polars_queries/q3.py b/polars_queries/q3.py index 336804a..8843e1a 100644 --- a/polars_queries/q3.py +++ b/polars_queries/q3.py @@ -21,7 +21,7 @@ def q(): .join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey") .filter(pl.col("o_orderdate") < var2) .filter(pl.col("l_shipdate") > var1) - .with_column( + .with_columns( (pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("revenue") ) .groupby(["o_orderkey", "o_orderdate", "o_shippriority"]) diff --git a/polars_queries/q4.py b/polars_queries/q4.py index 96b8d10..00a64fd 100644 --- a/polars_queries/q4.py +++ b/polars_queries/q4.py @@ -23,7 +23,7 @@ def q(): .groupby("o_orderpriority") .agg(pl.count().alias("order_count")) .sort(by="o_orderpriority") - .with_column(pl.col("order_count").cast(pl.datatypes.Int64)) + .with_columns(pl.col("order_count").cast(pl.datatypes.Int64)) ) utils.run_query(Q_NUM, q_final) diff --git a/polars_queries/q5.py b/polars_queries/q5.py index fa8e1ca..0ea3904 100644 --- a/polars_queries/q5.py +++ b/polars_queries/q5.py @@ -32,7 +32,7 @@ def q(): .filter(pl.col("r_name") == var1) .filter(pl.col("o_orderdate") >= var2) .filter(pl.col("o_orderdate") < var3) - .with_column( + .with_columns( (pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("revenue") ) .groupby("n_name") diff --git a/polars_queries/q7.py b/polars_queries/q7.py index 4ca74e5..33a2df0 100644 --- a/polars_queries/q7.py +++ b/polars_queries/q7.py @@ -41,10 +41,10 @@ def q(): pl.concat([df1, df2]) .filter(pl.col("l_shipdate") >= datetime(1995, 1, 1)) .filter(pl.col("l_shipdate") <= datetime(1996, 12, 31)) - .with_column( + .with_columns( (pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("volume") ) - .with_column(pl.col("l_shipdate").dt.year().alias("l_year")) + .with_columns(pl.col("l_shipdate").dt.year().alias("l_year")) .groupby(["supp_nation", "cust_nation", "l_year"]) .agg([pl.sum("volume").alias("revenue")]) .sort(by=["supp_nation", "cust_nation", "l_year"]) diff --git a/polars_queries/utils.py b/polars_queries/utils.py index 89d5ee6..a74349b 100644 --- a/polars_queries/utils.py +++ b/polars_queries/utils.py @@ -34,7 +34,7 @@ def _scan_ds(path: str): def get_query_answer(query: int, base_dir: str = ANSWERS_BASE_DIR) -> pl.LazyFrame: answer_ldf = pl.scan_csv( - join(base_dir, f"q{query}.out"), sep="|", has_header=True, parse_dates=True + join(base_dir, f"q{query}.out"), separator="|", has_header=True, try_parse_dates=True ) cols = answer_ldf.columns answer_ldf = answer_ldf.select( diff --git a/prepare_files.py b/prepare_files.py index da334b1..bb0f32e 100644 --- a/prepare_files.py +++ b/prepare_files.py @@ -103,8 +103,8 @@ df = pl.read_csv( f"tables_scale_{scale_fac}/{name}.tbl", has_header=False, - sep="|", - parse_dates=True, + separator="|", + try_parse_dates=True, new_columns=eval(f"h_{name}"), ) print(df.shape) diff --git a/prepare_large_files.py b/prepare_large_files.py index 65e602c..c1fef76 100644 --- a/prepare_large_files.py +++ b/prepare_large_files.py @@ -111,8 +111,8 @@ df = pl.scan_csv( f"tables_scale_{scale_fac}/{name}.tbl", has_header=False, - sep="|", - parse_dates=True, + separator="|", + try_parse_dates=True, with_column_names= lambda _: eval(f"h_{name}") ) diff --git a/scripts/plot_results.py b/scripts/plot_results.py index 3988380..bcfc394 100644 --- a/scripts/plot_results.py +++ b/scripts/plot_results.py @@ -50,7 +50,7 @@ def add_annotations(fig, limit: int, df: pl.DataFrame): # and create a text label for them df = ( df.filter(pl.col("duration[s]") > limit) - .with_column( + .with_columns( pl.when(pl.col("success")) .then( pl.format( @@ -62,7 +62,7 @@ def add_annotations(fig, limit: int, df: pl.DataFrame): .join(bar_order, on="solution") .groupby("query_no") .agg([pl.col("labels").list(), pl.col("index").min()]) - .with_column(pl.col("labels").arr.join(",\n")) + .with_columns(pl.col("labels").arr.join(",\n")) ) # then we create a dictionary similar to something like this: