Add unit tests for --preserve_input_sort_order (see #75)

bxparks · Nov 11, 2021 · 7673bb5 · 7673bb5
1 parent 02e09dd
commit 7673bb5
Show file tree

Hide file tree

Showing 2 changed files with 97 additions and 1 deletion.
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
@@ -608,6 +608,7 @@ def verify_data_chunk_as_csv_json_dict(self, *, chunk, as_dict):
         quoted_values_are_strings = ('quoted_values_are_strings' in data_flags)
         sanitize_names = ('sanitize_names' in data_flags)
         ignore_invalid_lines = ('ignore_invalid_lines' in data_flags)
+        preserve_input_sort_order = ('preserve_input_sort_order' in data_flags)
         records = chunk['records']
         expected_errors = chunk['errors']
         expected_error_map = chunk['error_map']
@@ -638,7 +639,8 @@ def verify_data_chunk_as_csv_json_dict(self, *, chunk, as_dict):
             keep_nulls=keep_nulls,
             quoted_values_are_strings=quoted_values_are_strings,
             sanitize_names=sanitize_names,
-            ignore_invalid_lines=ignore_invalid_lines)
+            ignore_invalid_lines=ignore_invalid_lines,
+            preserve_input_sort_order=preserve_input_sort_order)
         existing_schema_map = None
         if existing_schema:
             existing_schema_map = bq_schema_to_map(json.loads(existing_schema))

diff --git a/tests/testdata.txt b/tests/testdata.txt
@@ -2158,3 +2158,97 @@ SCHEMA
   }
 ]
 END
+
+# Test --preserve_input_sort_order flag. Without the flag, the
+# keys are in sorted order, for compatibility with 'bq load --autodetect`,
+# at least what 'bq load' used to do.
+# See https://github.com/bxparks/bigquery-schema-generator/pull/75
+DATA
+{ "s": "string", "i": 3, "x": 3.2, "b": true }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "b",
+    "type": "BOOLEAN"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "i",
+    "type": "INTEGER"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "s",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "x",
+    "type": "FLOAT"
+  }
+]
+END
+
+# Test --preserve_input_sort_order flag. With the flag, the column keys should
+# be in the order they appear in the JSON data.
+# See https://github.com/bxparks/bigquery-schema-generator/pull/75
+DATA preserve_input_sort_order
+{ "s": "string", "i": 3, "x": 3.2, "b": true }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "s",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "i",
+    "type": "INTEGER"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "x",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "b",
+    "type": "BOOLEAN"
+  }
+]
+END
+
+# Test --preserve_input_sort_order flag. Each JSON data record can contain a
+# partial list of keys. So the order of columns in the schema will be the order
+# in which they are first *seen* by the bigquery_schema_generator.
+# See https://github.com/bxparks/bigquery-schema-generator/pull/75
+DATA preserve_input_sort_order
+{ "s": "string", "i": 3 }
+{ "x": 3.2, "s": "string", "i": 3 }
+{ "b": true, "x": 3.2, "s": "string", "i": 3 }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "s",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "i",
+    "type": "INTEGER"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "x",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "b",
+    "type": "BOOLEAN"
+  }
+]
+END