-
Notifications
You must be signed in to change notification settings - Fork 14k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add parquet upload #14449
feat: Add parquet upload #14449
Changes from all commits
4cc378a
c44fcee
968c048
c23b1ec
97bea75
2f8482b
77b381f
f8b0dfc
b40a3e2
f2911b5
dd889b0
cfdd3be
5e063bd
4c74594
9096faf
98c75ba
d92a290
3501fac
f209999
10d6229
35eaea6
10e825d
658656d
0f1816b
7b1b53b
b885b38
6bcc0d9
60ae1fe
5b5eb74
df2930f
9b4d35b
de3509e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
{# | ||
Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
#} | ||
{% extends 'appbuilder/general/model/edit.html' %} | ||
|
||
{% block tail_js %} | ||
{{ super() }} | ||
<script> | ||
var db = $("#con"); | ||
var schema = $("#schema"); | ||
|
||
// this element is a text input | ||
// copy it here so it can be reused later | ||
var any_schema_is_allowed = schema.clone(); | ||
|
||
update_schemas_allowed_for_columnar_upload(db.val()); | ||
db.change(function(){ | ||
update_schemas_allowed_for_columnar_upload(db.val()); | ||
}); | ||
|
||
function update_schemas_allowed_for_columnar_upload(db_id) { | ||
$.ajax({ | ||
method: "GET", | ||
url: "/superset/schemas_access_for_file_upload", | ||
data: {db_id: db_id}, | ||
dataType: 'json', | ||
contentType: "application/json; charset=utf-8" | ||
}).done(function(data) { | ||
change_schema_field_in_formview(data) | ||
}).fail(function(error) { | ||
var errorMsg = error.responseJSON.error; | ||
alert("ERROR: " + errorMsg); | ||
}); | ||
} | ||
|
||
function change_schema_field_in_formview(schemas_allowed){ | ||
if (schemas_allowed && schemas_allowed.length > 0) { | ||
var dropdown_schema_lists = '<select id="schema" name="schema" required>'; | ||
schemas_allowed.forEach(function(schema_allowed) { | ||
dropdown_schema_lists += ('<option value="' + schema_allowed + '">' + schema_allowed + '</option>'); | ||
}); | ||
dropdown_schema_lists += '</select>'; | ||
$("#schema").replaceWith(dropdown_schema_lists); | ||
} else { | ||
$("#schema").replaceWith(any_schema_is_allowed) | ||
} | ||
} | ||
</script> | ||
{% endblock %} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,13 @@ | |
from flask_appbuilder.forms import DynamicForm | ||
from flask_babel import lazy_gettext as _ | ||
from flask_wtf.file import FileAllowed, FileField, FileRequired | ||
from wtforms import BooleanField, IntegerField, SelectField, StringField | ||
from wtforms import ( | ||
BooleanField, | ||
IntegerField, | ||
MultipleFileField, | ||
SelectField, | ||
StringField, | ||
) | ||
from wtforms.ext.sqlalchemy.fields import QuerySelectField | ||
from wtforms.validators import DataRequired, Length, NumberRange, Optional | ||
|
||
|
@@ -163,6 +169,15 @@ def at_least_one_schema_is_allowed(database: Database) -> bool: | |
_("Mangle Duplicate Columns"), | ||
description=_('Specify duplicate columns as "X.0, X.1".'), | ||
) | ||
usecols = JsonListField( | ||
_("Use Columns"), | ||
default=None, | ||
description=_( | ||
"Json list of the column names that should be read. " | ||
"If not None, only these columns will be read from the file." | ||
), | ||
validators=[Optional()], | ||
) | ||
Comment on lines
+172
to
+180
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @villebro do you still want to keep the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, let's keep this, I think it's a great addition 👍 |
||
skipinitialspace = BooleanField( | ||
_("Skip Initial Space"), description=_("Skip spaces after delimiter.") | ||
) | ||
|
@@ -402,3 +417,130 @@ def at_least_one_schema_is_allowed(database: Database) -> bool: | |
'Use [""] for empty string.' | ||
), | ||
) | ||
|
||
|
||
class ColumnarToDatabaseForm(DynamicForm): | ||
# pylint: disable=E0211 | ||
def columnar_allowed_dbs() -> List[Database]: # type: ignore | ||
Comment on lines
+423
to
+424
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder why this wasn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding that decorator will throw |
||
# TODO: change allow_csv_upload to allow_file_upload | ||
columnar_enabled_dbs = ( | ||
db.session.query(Database).filter_by(allow_csv_upload=True).all() | ||
) | ||
return [ | ||
columnar_enabled_db | ||
for columnar_enabled_db in columnar_enabled_dbs | ||
if ColumnarToDatabaseForm.at_least_one_schema_is_allowed( | ||
columnar_enabled_db | ||
) | ||
] | ||
Comment on lines
+422
to
+435
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's unnecessary duplication here: we could abstract this into a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just opened #16046. Feel free to assign it to me. I'll try to open a PR for it sometime in the next week There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks - I assigned to you (will keep myself as assignee for review) |
||
|
||
@staticmethod | ||
def at_least_one_schema_is_allowed(database: Database) -> bool: | ||
""" | ||
If the user has access to the database or all datasource | ||
1. if schemas_allowed_for_csv_upload is empty | ||
a) if database does not support schema | ||
user is able to upload columnar without specifying schema name | ||
b) if database supports schema | ||
user is able to upload columnar to any schema | ||
2. if schemas_allowed_for_csv_upload is not empty | ||
a) if database does not support schema | ||
This situation is impossible and upload will fail | ||
b) if database supports schema | ||
user is able to upload to schema in schemas_allowed_for_csv_upload | ||
elif the user does not access to the database or all datasource | ||
1. if schemas_allowed_for_csv_upload is empty | ||
a) if database does not support schema | ||
user is unable to upload columnar | ||
b) if database supports schema | ||
user is unable to upload columnar | ||
2. if schemas_allowed_for_csv_upload is not empty | ||
a) if database does not support schema | ||
This situation is impossible and user is unable to upload columnar | ||
b) if database supports schema | ||
user is able to upload to schema in schemas_allowed_for_csv_upload | ||
""" | ||
if security_manager.can_access_database(database): | ||
return True | ||
schemas = database.get_schema_access_for_csv_upload() | ||
if schemas and security_manager.schemas_accessible_by_user( | ||
database, schemas, False | ||
): | ||
return True | ||
return False | ||
|
||
name = StringField( | ||
_("Table Name"), | ||
description=_("Name of table to be created from columnar data."), | ||
validators=[DataRequired()], | ||
widget=BS3TextFieldWidget(), | ||
) | ||
columnar_file = MultipleFileField( | ||
_("Columnar File"), | ||
description=_("Select a Columnar file to be uploaded to a database."), | ||
validators=[ | ||
DataRequired(), | ||
FileAllowed( | ||
config["ALLOWED_EXTENSIONS"].intersection( | ||
config["COLUMNAR_EXTENSIONS"] | ||
), | ||
_( | ||
"Only the following file extensions are allowed: " | ||
"%(allowed_extensions)s", | ||
allowed_extensions=", ".join( | ||
config["ALLOWED_EXTENSIONS"].intersection( | ||
config["COLUMNAR_EXTENSIONS"] | ||
) | ||
), | ||
), | ||
), | ||
], | ||
) | ||
|
||
con = QuerySelectField( | ||
_("Database"), | ||
query_factory=columnar_allowed_dbs, | ||
get_pk=lambda a: a.id, | ||
get_label=lambda a: a.database_name, | ||
) | ||
schema = StringField( | ||
_("Schema"), | ||
description=_("Specify a schema (if database flavor supports this)."), | ||
validators=[Optional()], | ||
widget=BS3TextFieldWidget(), | ||
) | ||
if_exists = SelectField( | ||
_("Table Exists"), | ||
description=_( | ||
"If table exists do one of the following: " | ||
"Fail (do nothing), Replace (drop and recreate table) " | ||
"or Append (insert data)." | ||
), | ||
choices=[ | ||
("fail", _("Fail")), | ||
("replace", _("Replace")), | ||
("append", _("Append")), | ||
], | ||
validators=[DataRequired()], | ||
) | ||
usecols = JsonListField( | ||
_("Use Columns"), | ||
default=None, | ||
description=_( | ||
"Json list of the column names that should be read. " | ||
"If not None, only these columns will be read from the file." | ||
), | ||
validators=[Optional()], | ||
) | ||
index = BooleanField( | ||
_("Dataframe Index"), description=_("Write dataframe index as a column.") | ||
) | ||
index_label = StringField( | ||
_("Column Label(s)"), | ||
description=_( | ||
"Column label for index column(s). If None is given " | ||
"and Dataframe Index is True, Index Names are used." | ||
), | ||
validators=[Optional()], | ||
widget=BS3TextFieldWidget(), | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you provide a screenshot of the updated form UI?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a screenshot to the summary above