From 1e82eb5ad6f71f07eeccbd3e974552b5b61df873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSamraHanifCareem=E2=80=9D?= <“samra.hanif@careem.com”> Date: Mon, 31 Oct 2022 01:48:39 +0500 Subject: [PATCH 1/5] feat: File Size Validation along with max and min rows to read added --- requirements/base.txt | 2 +- superset/config.py | 3 +++ superset/views/base.py | 5 ++++- superset/views/database/forms.py | 23 ++++++++++++++++++++--- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 8387d380bc5ab..5ae73c6a70ab5 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -96,7 +96,7 @@ flask-sqlalchemy==2.5.1 # flask-migrate flask-talisman==0.8.1 # via apache-superset -flask-wtf==0.14.3 +flask-wtf==1.0.1 # via # apache-superset # flask-appbuilder diff --git a/superset/config.py b/superset/config.py index 30f8bbc89341f..af89582a4d999 100644 --- a/superset/config.py +++ b/superset/config.py @@ -655,6 +655,9 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: CSV_EXTENSIONS = {"csv", "tsv", "txt"} COLUMNAR_EXTENSIONS = {"parquet", "zip"} ALLOWED_EXTENSIONS = {*EXCEL_EXTENSIONS, *CSV_EXTENSIONS, *COLUMNAR_EXTENSIONS} +CSV_MAX_SIZES = 1 * 1024 * 1024 +CSV_MAX_ROWS = 500 +CSV_MIN_ROWS = 1 # CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv # method. diff --git a/superset/views/base.py b/superset/views/base.py index 0f9bfc9e4063b..94abb87dd440c 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -80,7 +80,7 @@ from superset.utils import core as utils from superset.utils.core import get_user_id -from .utils import bootstrap_user_data +from superset.views.utils import bootstrap_user_data FRONTEND_CONF_KEYS = ( "SUPERSET_WEBSERVER_TIMEOUT", @@ -110,6 +110,9 @@ "CSV_EXTENSIONS", "COLUMNAR_EXTENSIONS", "ALLOWED_EXTENSIONS", + "CSV_MAX_SIZES", + "CSV_MAX_ROWS", + "CSV_MIN_ROWS", "SAMPLES_ROW_LIMIT", "DEFAULT_TIME_FILTER", ) diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py index a44a412b483b0..7c13d3e80234f 100644 --- a/superset/views/database/forms.py +++ b/superset/views/database/forms.py @@ -20,7 +20,7 @@ from flask_appbuilder.fieldwidgets import BS3TextFieldWidget from flask_appbuilder.forms import DynamicForm from flask_babel import lazy_gettext as _ -from flask_wtf.file import FileAllowed, FileField, FileRequired +from flask_wtf.file import FileAllowed, FileField, FileRequired,FileSize from wtforms import ( BooleanField, IntegerField, @@ -119,6 +119,12 @@ class CsvToDatabaseForm(UploadToDatabaseForm): description=_("Select a CSV file to be uploaded to a database."), validators=[ FileRequired(), + FileSize( + config["CSV_MAX_SIZES"], + message="File size must not exceed the limit: " + + str(config["CSV_MAX_SIZES"] / 1048576) + + "MB", + ), FileAllowed( config["ALLOWED_EXTENSIONS"].intersection(config["CSV_EXTENSIONS"]), _( @@ -131,6 +137,7 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ), ), ), + ], ) con = QuerySelectField( @@ -208,8 +215,18 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ) nrows = IntegerField( _("Rows to Read"), - description=_("Number of rows of file to read."), - validators=[Optional(), NumberRange(min=0)], + description=_( + "Number of rows of file to read. Minimum " + + str(config["CSV_MIN_ROWS"]) + + " and Maximum " + + str(config["CSV_MAX_ROWS"]) + + " rows are allowed" + ), + validators=[ + Optional(), + NumberRange(min=config["CSV_MIN_ROWS"]), + NumberRange(max=config["CSV_MAX_ROWS"]), + ], widget=BS3TextFieldWidget(), ) skip_blank_lines = BooleanField( From fe750a7f52bee2ea0358b72674c2d5d9527853ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSamraHanifCareem=E2=80=9D?= <“samra.hanif@careem.com”> Date: Wed, 4 Jan 2023 15:15:10 +0500 Subject: [PATCH 2/5] fix: max rows to read added to 1000 --- superset/config.py | 2 +- superset/views/database/forms.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/superset/config.py b/superset/config.py index af89582a4d999..63e17df6f70ce 100644 --- a/superset/config.py +++ b/superset/config.py @@ -656,7 +656,7 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: COLUMNAR_EXTENSIONS = {"parquet", "zip"} ALLOWED_EXTENSIONS = {*EXCEL_EXTENSIONS, *CSV_EXTENSIONS, *COLUMNAR_EXTENSIONS} CSV_MAX_SIZES = 1 * 1024 * 1024 -CSV_MAX_ROWS = 500 +CSV_MAX_ROWS = 1000 CSV_MIN_ROWS = 1 # CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py index 7c13d3e80234f..3765c83c08206 100644 --- a/superset/views/database/forms.py +++ b/superset/views/database/forms.py @@ -20,7 +20,7 @@ from flask_appbuilder.fieldwidgets import BS3TextFieldWidget from flask_appbuilder.forms import DynamicForm from flask_babel import lazy_gettext as _ -from flask_wtf.file import FileAllowed, FileField, FileRequired,FileSize +from flask_wtf.file import FileAllowed, FileField, FileRequired, FileSize from wtforms import ( BooleanField, IntegerField, @@ -137,7 +137,6 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ), ), ), - ], ) con = QuerySelectField( @@ -215,7 +214,7 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ) nrows = IntegerField( _("Rows to Read"), - description=_( + description=_( "Number of rows of file to read. Minimum " + str(config["CSV_MIN_ROWS"]) + " and Maximum " From 056df4fb19a7a69cc38e767ad156486ab56f3ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSamraHanifCareem=E2=80=9D?= <“samra.hanif@careem.com”> Date: Wed, 4 Jan 2023 15:36:35 +0500 Subject: [PATCH 3/5] fix: merge conflicts resolved --- superset/views/database/forms.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py index 2d2b2a277a371..9aa084ce06117 100644 --- a/superset/views/database/forms.py +++ b/superset/views/database/forms.py @@ -208,12 +208,6 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ), validators=[Optional()], ) - skip_initial_space = BooleanField( - _("Skip Initial Space"), description=_("Skip spaces after delimiter") - ) - skipinitialspace = BooleanField( - _("Skip Initial Space"), description=_("Skip spaces after delimiter.") - ) skiprows = IntegerField( _("Skip Rows"), description=_("Number of rows to skip at start of file."), @@ -236,6 +230,9 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ], widget=BS3TextFieldWidget(), ) + skip_initial_space = BooleanField( + _("Skip Initial Space"), description=_("Skip spaces after delimiter") + ) skip_blank_lines = BooleanField( _("Skip Blank Lines"), description=_( From 3ac825fb26a354318253936dbf6d8a96687a8791 Mon Sep 17 00:00:00 2001 From: SamraHanifCareem Date: Sat, 7 Jan 2023 15:28:49 +0500 Subject: [PATCH 4/5] fix: validations added according to new template --- superset/views/database/forms.py | 78 +++++++++----------------------- 1 file changed, 21 insertions(+), 57 deletions(-) diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py index 9aa084ce06117..612574f71f13c 100644 --- a/superset/views/database/forms.py +++ b/superset/views/database/forms.py @@ -107,7 +107,15 @@ def is_engine_allowed_to_file_upl(database: Database) -> bool: class CsvToDatabaseForm(UploadToDatabaseForm): csv_file = FileField( _("CSV Upload"), - description=_("Select a file to be uploaded to the database"), + description=_( + "Select a file to be uploaded to a database. Max Size of the file should be " + + str(config["CSV_MAX_SIZES"] / 1048576) + + " MB and the accepted extensions are: " + "%(allowed_extensions)s", + allowed_extensions=", ".join( + config["ALLOWED_EXTENSIONS"].intersection(config["CSV_EXTENSIONS"]) + ), + ), validators=[ FileRequired(), FileSize( @@ -176,60 +184,6 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ], validators=[DataRequired()], ) - header = IntegerField( - _("Header Row"), - description=_( - "Row containing the headers to use as " - "column names (0 is first line of data). " - "Leave empty if there is no header row." - ), - validators=[Optional(), NumberRange(min=0)], - widget=BS3TextFieldWidget(), - ) - index_col = IntegerField( - _("Index Column"), - description=_( - "Column to use as the row labels of the " - "dataframe. Leave empty if no index column." - ), - validators=[Optional(), NumberRange(min=0)], - widget=BS3TextFieldWidget(), - ) - mangle_dupe_cols = BooleanField( - _("Mangle Duplicate Columns"), - description=_('Specify duplicate columns as "X.0, X.1".'), - ) - usecols = JsonListField( - _("Use Columns"), - default=None, - description=_( - "Json list of the column names that should be read. " - "If not None, only these columns will be read from the file." - ), - validators=[Optional()], - ) - skiprows = IntegerField( - _("Skip Rows"), - description=_("Number of rows to skip at start of file."), - validators=[Optional(), NumberRange(min=0)], - widget=BS3TextFieldWidget(), - ) - nrows = IntegerField( - _("Rows to Read"), - description=_( - "Number of rows of file to read. Minimum " - + str(config["CSV_MIN_ROWS"]) - + " and Maximum " - + str(config["CSV_MAX_ROWS"]) - + " rows are allowed" - ), - validators=[ - Optional(), - NumberRange(min=config["CSV_MIN_ROWS"]), - NumberRange(max=config["CSV_MAX_ROWS"]), - ], - widget=BS3TextFieldWidget(), - ) skip_initial_space = BooleanField( _("Skip Initial Space"), description=_("Skip spaces after delimiter") ) @@ -312,8 +266,18 @@ class CsvToDatabaseForm(UploadToDatabaseForm): ) nrows = IntegerField( _("Rows to Read"), - description=_("Number of rows of file to read"), - validators=[Optional(), NumberRange(min=0)], + description=_( + "Number of rows of file to read. Minimum " + + str(config["CSV_MIN_ROWS"]) + + " and Maximum " + + str(config["CSV_MAX_ROWS"]) + + " rows are allowed" + ), + validators=[ + Optional(), + NumberRange(min=config["CSV_MIN_ROWS"]), + NumberRange(max=config["CSV_MAX_ROWS"]), + ], widget=BS3TextFieldWidget(), ) skiprows = IntegerField( From 4a50ece5846fb52d73f5cf425a16ebaa4d78b85a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSamraHanifCareem=E2=80=9D?= <“samra.hanif@careem.com”> Date: Wed, 11 Jan 2023 02:20:12 +0500 Subject: [PATCH 5/5] fix: sorting of imports added through isort --- superset/views/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/views/base.py b/superset/views/base.py index bb07fc253a149..afc66453032fd 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -79,7 +79,6 @@ from superset.translations.utils import get_language_pack from superset.utils import core as utils from superset.utils.core import get_user_id - from superset.views.utils import bootstrap_user_data FRONTEND_CONF_KEYS = (