From 2a0e473c0c1afa1bf9e6b28d996e9f91f83550e9 Mon Sep 17 00:00:00 2001 From: Billy Lanchantin Date: Thu, 28 Nov 2024 15:51:34 -0500 Subject: [PATCH] Unskip DataFrame serialization properties (#1028) * Allow specifying the dtype generator directly Before we could only specify the leaf part of the dtype tree generator. This change lets us alternately specify the whole thing. * Remove all `:skip` tags If we exclude `:category`, we can avoid #1011. Note: there may still be issues with `:category` but we won't be able to find them until #1011 is resolved. --- test/explorer/data_frame_test.exs | 29 +++++++++++++++++------------ test/support/generator.ex | 6 ++++-- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 8fdf9813f..c67e5bdf1 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -4729,8 +4729,7 @@ defmodule Explorer.DataFrameTest do # These property tests are a work in progress. They currently aim to cover # creation and serialization (including printing). Serialization in particular - # is causing lots of panics. The plan is to keep the properties that don't - # pass but with `@tag :skip` until we can fix them. + # is causing lots of panics. Issue #1011 currently blocks a few. # # Notes: # @@ -4778,10 +4777,14 @@ defmodule Explorer.DataFrameTest do end end - @tag :skip - property "can dump any DataFrame (without duration) to CSV" do + property "can dump non-composite, non-binary, non-duration DataFrame to CSV" do + not_yet_implemented = [:binary, :duration] + check all( - dtypes <- Explorer.Generator.dtypes(exclude: :duration), + dtypes <- + Explorer.Generator.dtypes( + dtype: Explorer.Generator.scalar_dtype(exclude: not_yet_implemented) + ), rows <- Explorer.Generator.rows(dtypes), max_runs: 1_000 ) do @@ -4791,10 +4794,10 @@ defmodule Explorer.DataFrameTest do end end - @tag :skip property "can dump any DataFrame to IPC" do check all( - dtypes <- Explorer.Generator.dtypes(), + # TODO: Remove `exclude: :category` after #1011 is resolved. + dtypes <- Explorer.Generator.dtypes(exclude: :category), rows <- Explorer.Generator.rows(dtypes), max_runs: 1_000 ) do @@ -4804,10 +4807,12 @@ defmodule Explorer.DataFrameTest do end end - @tag :skip - property "can dump any DataFrame to NDJSON" do + property "can dump any non-binary, non-time DataFrame to NDJSON" do + not_yet_implemented = [:binary, :time] + check all( - dtypes <- Explorer.Generator.dtypes(), + # TODO: Remove `exclude: :category` after #1011 is resolved. + dtypes <- Explorer.Generator.dtypes(exclude: [:category | not_yet_implemented]), rows <- Explorer.Generator.rows(dtypes), max_runs: 1_000 ) do @@ -4817,10 +4822,10 @@ defmodule Explorer.DataFrameTest do end end - @tag :skip property "can dump any DataFrame to PARQUET" do check all( - dtypes <- Explorer.Generator.dtypes(), + # TODO: Remove `exclude: :category` after #1011 is resolved. + dtypes <- Explorer.Generator.dtypes(exclude: :category), rows <- Explorer.Generator.rows(dtypes), max_runs: 1_000 ) do diff --git a/test/support/generator.ex b/test/support/generator.ex index 1305ef443..235baee09 100644 --- a/test/support/generator.ex +++ b/test/support/generator.ex @@ -190,13 +190,15 @@ defmodule Explorer.Generator do def dtypes(opts \\ []) do {list_of_opts, dtype_opts} = Keyword.split(opts, [:min_length, :max_length, :length]) + dtypes = dtype_opts[:dtype] || dtype(Keyword.delete(dtype_opts, :dtype)) + list_of_opts = list_of_opts |> Keyword.put_new(:min_length, 1) |> Keyword.put_new(:max_length, 2) |> Keyword.put(:uniq_fun, &elem(&1, 0)) - uniq_list_of(tuple({column_name(), dtype(dtype_opts)}), list_of_opts) + uniq_list_of(tuple({column_name(), dtypes}), list_of_opts) end # For clarity, column names and field names are built from different halves of @@ -286,7 +288,7 @@ defmodule Explorer.Generator do end @spec scalar_dtype(keyword()) :: gen(dtype()) - defp scalar_dtype(opts) do + def scalar_dtype(opts) do scalars_by_alias = %{ binary: constant(:binary),