From 2a0e473c0c1afa1bf9e6b28d996e9f91f83550e9 Mon Sep 17 00:00:00 2001
From: Billy Lanchantin <william.lanchantin@cargosense.com>
Date: Thu, 28 Nov 2024 15:51:34 -0500
Subject: [PATCH] Unskip DataFrame serialization properties (#1028)

* Allow specifying the dtype generator directly

Before we could only specify the leaf part of the
dtype tree generator. This change lets us
alternately specify the whole thing.

* Remove all `:skip` tags

If we exclude `:category`, we can avoid #1011.
Note: there may still be issues with `:category`
but we won't be able to find them until #1011 is
resolved.
---
 test/explorer/data_frame_test.exs | 29 +++++++++++++++++------------
 test/support/generator.ex         |  6 ++++--
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs
index 8fdf9813f..c67e5bdf1 100644
--- a/test/explorer/data_frame_test.exs
+++ b/test/explorer/data_frame_test.exs
@@ -4729,8 +4729,7 @@ defmodule Explorer.DataFrameTest do
 
   # These property tests are a work in progress. They currently aim to cover
   # creation and serialization (including printing). Serialization in particular
-  # is causing lots of panics. The plan is to keep the properties that don't
-  # pass but with `@tag :skip` until we can fix them.
+  # is causing lots of panics. Issue #1011 currently blocks a few.
   #
   # Notes:
   #
@@ -4778,10 +4777,14 @@ defmodule Explorer.DataFrameTest do
       end
     end
 
-    @tag :skip
-    property "can dump any DataFrame (without duration) to CSV" do
+    property "can dump non-composite, non-binary, non-duration DataFrame to CSV" do
+      not_yet_implemented = [:binary, :duration]
+
       check all(
-              dtypes <- Explorer.Generator.dtypes(exclude: :duration),
+              dtypes <-
+                Explorer.Generator.dtypes(
+                  dtype: Explorer.Generator.scalar_dtype(exclude: not_yet_implemented)
+                ),
               rows <- Explorer.Generator.rows(dtypes),
               max_runs: 1_000
             ) do
@@ -4791,10 +4794,10 @@ defmodule Explorer.DataFrameTest do
       end
     end
 
-    @tag :skip
     property "can dump any DataFrame to IPC" do
       check all(
-              dtypes <- Explorer.Generator.dtypes(),
+              # TODO: Remove `exclude: :category` after #1011 is resolved.
+              dtypes <- Explorer.Generator.dtypes(exclude: :category),
               rows <- Explorer.Generator.rows(dtypes),
               max_runs: 1_000
             ) do
@@ -4804,10 +4807,12 @@ defmodule Explorer.DataFrameTest do
       end
     end
 
-    @tag :skip
-    property "can dump any DataFrame to NDJSON" do
+    property "can dump any non-binary, non-time DataFrame to NDJSON" do
+      not_yet_implemented = [:binary, :time]
+
       check all(
-              dtypes <- Explorer.Generator.dtypes(),
+              # TODO: Remove `exclude: :category` after #1011 is resolved.
+              dtypes <- Explorer.Generator.dtypes(exclude: [:category | not_yet_implemented]),
               rows <- Explorer.Generator.rows(dtypes),
               max_runs: 1_000
             ) do
@@ -4817,10 +4822,10 @@ defmodule Explorer.DataFrameTest do
       end
     end
 
-    @tag :skip
     property "can dump any DataFrame to PARQUET" do
       check all(
-              dtypes <- Explorer.Generator.dtypes(),
+              # TODO: Remove `exclude: :category` after #1011 is resolved.
+              dtypes <- Explorer.Generator.dtypes(exclude: :category),
               rows <- Explorer.Generator.rows(dtypes),
               max_runs: 1_000
             ) do
diff --git a/test/support/generator.ex b/test/support/generator.ex
index 1305ef443..235baee09 100644
--- a/test/support/generator.ex
+++ b/test/support/generator.ex
@@ -190,13 +190,15 @@ defmodule Explorer.Generator do
   def dtypes(opts \\ []) do
     {list_of_opts, dtype_opts} = Keyword.split(opts, [:min_length, :max_length, :length])
 
+    dtypes = dtype_opts[:dtype] || dtype(Keyword.delete(dtype_opts, :dtype))
+
     list_of_opts =
       list_of_opts
       |> Keyword.put_new(:min_length, 1)
       |> Keyword.put_new(:max_length, 2)
       |> Keyword.put(:uniq_fun, &elem(&1, 0))
 
-    uniq_list_of(tuple({column_name(), dtype(dtype_opts)}), list_of_opts)
+    uniq_list_of(tuple({column_name(), dtypes}), list_of_opts)
   end
 
   # For clarity, column names and field names are built from different halves of
@@ -286,7 +288,7 @@ defmodule Explorer.Generator do
   end
 
   @spec scalar_dtype(keyword()) :: gen(dtype())
-  defp scalar_dtype(opts) do
+  def scalar_dtype(opts) do
     scalars_by_alias =
       %{
         binary: constant(:binary),