Skip to content

Commit

Permalink
Add unit tests for --preserve_input_sort_order (see #75)
Browse files Browse the repository at this point in the history
  • Loading branch information
bxparks committed Nov 11, 2021
1 parent 02e09dd commit 7673bb5
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 1 deletion.
4 changes: 3 additions & 1 deletion tests/test_generate_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ def verify_data_chunk_as_csv_json_dict(self, *, chunk, as_dict):
quoted_values_are_strings = ('quoted_values_are_strings' in data_flags)
sanitize_names = ('sanitize_names' in data_flags)
ignore_invalid_lines = ('ignore_invalid_lines' in data_flags)
preserve_input_sort_order = ('preserve_input_sort_order' in data_flags)
records = chunk['records']
expected_errors = chunk['errors']
expected_error_map = chunk['error_map']
Expand Down Expand Up @@ -638,7 +639,8 @@ def verify_data_chunk_as_csv_json_dict(self, *, chunk, as_dict):
keep_nulls=keep_nulls,
quoted_values_are_strings=quoted_values_are_strings,
sanitize_names=sanitize_names,
ignore_invalid_lines=ignore_invalid_lines)
ignore_invalid_lines=ignore_invalid_lines,
preserve_input_sort_order=preserve_input_sort_order)
existing_schema_map = None
if existing_schema:
existing_schema_map = bq_schema_to_map(json.loads(existing_schema))
Expand Down
94 changes: 94 additions & 0 deletions tests/testdata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2158,3 +2158,97 @@ SCHEMA
}
]
END

# Test --preserve_input_sort_order flag. Without the flag, the
# keys are in sorted order, for compatibility with 'bq load --autodetect`,
# at least what 'bq load' used to do.
# See https://github.com/bxparks/bigquery-schema-generator/pull/75
DATA
{ "s": "string", "i": 3, "x": 3.2, "b": true }
SCHEMA
[
{
"mode": "NULLABLE",
"name": "b",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "i",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "s",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "x",
"type": "FLOAT"
}
]
END

# Test --preserve_input_sort_order flag. With the flag, the column keys should
# be in the order they appear in the JSON data.
# See https://github.com/bxparks/bigquery-schema-generator/pull/75
DATA preserve_input_sort_order
{ "s": "string", "i": 3, "x": 3.2, "b": true }
SCHEMA
[
{
"mode": "NULLABLE",
"name": "s",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "i",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "x",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "b",
"type": "BOOLEAN"
}
]
END

# Test --preserve_input_sort_order flag. Each JSON data record can contain a
# partial list of keys. So the order of columns in the schema will be the order
# in which they are first *seen* by the bigquery_schema_generator.
# See https://github.com/bxparks/bigquery-schema-generator/pull/75
DATA preserve_input_sort_order
{ "s": "string", "i": 3 }
{ "x": 3.2, "s": "string", "i": 3 }
{ "b": true, "x": 3.2, "s": "string", "i": 3 }
SCHEMA
[
{
"mode": "NULLABLE",
"name": "s",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "i",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "x",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "b",
"type": "BOOLEAN"
}
]
END

0 comments on commit 7673bb5

Please sign in to comment.