Skip to content

Commit

Permalink
Merge pull request #75 from kdeggelman/keep-original-order
Browse files Browse the repository at this point in the history
Add option to preserve the original ordering of columns
  • Loading branch information
bxparks committed Nov 11, 2021
2 parents 4510130 + cb73385 commit 02e09dd
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion bigquery_schema_generator/generate_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(
debugging_map=False,
sanitize_names=False,
ignore_invalid_lines=False,
preserve_input_sort_order=False,
):
self.input_format = input_format
self.infer_mode = infer_mode
Expand Down Expand Up @@ -113,7 +114,10 @@ def __init__(
# If CSV, preserve the original ordering because 'bq load` matches the
# CSV column with the respective schema entry using the position of the
# column in the schema.
self.sorted_schema = (input_format in {'json', 'dict'})
self.sorted_schema = (
(input_format in {'json', 'dict'})
and not preserve_input_sort_order
)

self.line_number = 0
self.error_logs = []
Expand Down Expand Up @@ -1042,6 +1046,13 @@ def main():
' This can be fetched with:'
' `bq show --schema <project_id>:<dataset>:<table_name>',
default=None)
parser.add_argument(
'--preserve_input_sort_order',
help='Preserve the original ordering of columns from input instead of'
' sorting alphabetically.'
' This only impacts `input_format` of json or dict',
action='store_true'
)
args = parser.parse_args()

# Configure logging.
Expand All @@ -1056,6 +1067,7 @@ def main():
debugging_map=args.debugging_map,
sanitize_names=args.sanitize_names,
ignore_invalid_lines=args.ignore_invalid_lines,
preserve_input_sort_order=args.preserve_input_sort_order
)
existing_schema_map = read_existing_schema_from_file(
args.existing_schema_path)
Expand Down

0 comments on commit 02e09dd

Please sign in to comment.