From 26f4ca489ecf1ab02e2c99a042f83da310c0480f Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 11 May 2023 16:42:36 -0500 Subject: [PATCH] Remove werkzeug requirement, replace generate_filename --- docs/requirements.txt | 3 -- hed/tools/__init__.py | 2 +- hed/tools/remodeling/dispatcher.py | 4 +- hed/tools/util/io_util.py | 38 ++++---------- requirements.txt | 3 -- setup.cfg | 1 - tests/tools/util/test_io_util.py | 80 ++++++++---------------------- 7 files changed, 33 insertions(+), 98 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 9ecb73f32..ce85e15ce 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,6 +7,3 @@ portalocker>=2.4.0 semantic_version>=2.9.0 Sphinx>=5.2.2 sphinx_rtd_theme>=1.0.0 - -# This is just needed for secure_filename and should probably be removed -Werkzeug>=2.1.2 diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index fd1dfbbce..6d7b49e77 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -40,7 +40,7 @@ from .util.hed_logger import HedLogger from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns -from .util.io_util import check_filename, generate_filename, extract_suffix_path, get_file_list, make_path +from .util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path from .util.io_util import get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename from .analysis import annotation_util diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index 59d467226..06d664e91 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -8,7 +8,7 @@ from hed.schema.hed_schema_io import get_schema from hed.tools.remodeling.backup_manager import BackupManager from hed.tools.remodeling.operations.valid_operations import valid_operations -from hed.tools.util.io_util import generate_filename, extract_suffix_path, get_timestamp +from hed.tools.util.io_util import clean_filename, extract_suffix_path, get_timestamp class Dispatcher: @@ -66,7 +66,7 @@ def get_summaries(self, file_formats=['.txt', '.json']): file_base = context_item.context_filename if self.data_root: file_base = extract_suffix_path(self.data_root, file_base) - file_base = generate_filename(file_base) + file_base = clean_filename(file_base) for file_format in file_formats: if file_format == '.txt': summary = context_item.get_text_summary(individual_summaries="consolidated") diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 83bd68075..97489ce26 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -1,8 +1,8 @@ """Utilities for generating and handling file names.""" import os +import re from datetime import datetime -from werkzeug.utils import secure_filename from hed.errors.exceptions import HedFileError TIME_FORMAT = '%Y_%m_%d_T_%H_%M_%S_%f' @@ -92,39 +92,19 @@ def extract_suffix_path(path, prefix_path): return return_path -def generate_filename(base_name, name_prefix=None, name_suffix=None, extension=None, append_datetime=False): - """ Generate a filename for the attachment. +def clean_filename(filename): + """ Replaces invalid characters with under-bars Parameters: - base_name (str): Name of the base, usually the name of the file that the issues were generated from. - name_prefix (str): Prefix prepended to the front of the base name. - name_suffix (str): Suffix appended to the end of the base name. - extension (str): Extension to use. - append_datetime (bool): If True, append the current date-time to the base output filename. + filename (str): source filename Returns: - str: Name of the attachment other containing the issues. - - Notes: - - The form prefix_basename_suffix + extension. - + str: The filename with anything but alphanumeric, period, hyphens, and under-bars removed. """ - - pieces = [] - if name_prefix: - pieces = pieces + [name_prefix] - if base_name: - pieces.append(os.path.splitext(base_name)[0]) - if name_suffix: - pieces = pieces + [name_suffix] - filename = "".join(pieces) - if append_datetime: - now = datetime.now() - filename = filename + '_' + now.strftime(TIME_FORMAT)[:-3] - if filename and extension: - filename = filename + extension - - return secure_filename(filename) + if not filename: + return "" + out_name = re.sub(r'[^a-zA-Z0-9._-]+', '_', filename) + return out_name def get_dir_dictionary(dir_path, name_prefix=None, name_suffix=None, extensions=None, skip_empty=True, diff --git a/requirements.txt b/requirements.txt index ff7ce8bb7..02309238a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,3 @@ openpyxl>=3.0.9 pandas>=1.3.5 portalocker>=2.4.0 semantic_version>=2.9.0 - -# This is just needed for secure_filename and should probably be removed -Werkzeug>=2.1.2 diff --git a/setup.cfg b/setup.cfg index 88199acad..fbd9ad553 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,6 @@ install_requires = pytz semantic-version six - Werkzeug [options.packages.find] diff --git a/tests/tools/util/test_io_util.py b/tests/tools/util/test_io_util.py index 5b05c2fd1..46373ad72 100644 --- a/tests/tools/util/test_io_util.py +++ b/tests/tools/util/test_io_util.py @@ -1,7 +1,7 @@ import os import unittest from hed.errors.exceptions import HedFileError -from hed.tools.util.io_util import check_filename, extract_suffix_path, generate_filename, \ +from hed.tools.util.io_util import check_filename, extract_suffix_path, clean_filename, \ get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename, \ _split_entity, get_allowed, get_filtered_by_element @@ -46,77 +46,39 @@ def test_extract_suffix_path(self): suffix_path = extract_suffix_path('c:/myroot/temp.tsv', 'c:') self.assertTrue(suffix_path.endswith('temp.tsv'), "extract_suffix_path has the right path") - def test_generate_file_name(self): - file1 = generate_filename('mybase') + def test_clean_file_name(self): + file1 = clean_filename('mybase') self.assertEqual(file1, "mybase", "generate_file_name should return the base when other arguments not set") - file2 = generate_filename('mybase', name_prefix="prefix") - self.assertEqual(file2, "prefixmybase", "generate_file_name should return correct name when prefix set") - file3 = generate_filename('mybase', name_prefix="prefix", extension=".json") - self.assertEqual(file3, "prefixmybase.json", "generate_file_name should return correct name for extension") - file4 = generate_filename('mybase', name_suffix="suffix") - self.assertEqual(file4, "mybasesuffix", "generate_file_name should return correct name when suffix set") - file5 = generate_filename('mybase', name_suffix="suffix", extension=".json") - self.assertEqual(file5, "mybasesuffix.json", "generate_file_name should return correct name for extension") - file6 = generate_filename('mybase', name_prefix="prefix", name_suffix="suffix", extension=".json") - self.assertEqual(file6, "prefixmybasesuffix.json", - "generate_file_name should return correct name for all set") - filename = generate_filename(None, name_prefix=None, name_suffix=None, extension=None) - self.assertEqual('', filename, "Return empty when all arguments are none") - filename = generate_filename(None, name_prefix=None, name_suffix=None, extension='.txt') - self.assertEqual('', filename, - "Return empty when base_name, prefix, and suffix are None, but extension is not") - filename = generate_filename('c:/temp.json', name_prefix=None, name_suffix=None, extension='.txt') - self.assertEqual('c_temp.txt', filename, - "Returns stripped base_name + extension when prefix, and suffix are None") - filename = generate_filename('temp.json', name_prefix='prefix_', name_suffix='_suffix', extension='.txt') - self.assertEqual('prefix_temp_suffix.txt', filename, - "Return stripped base_name + extension when prefix, and suffix are None") - filename = generate_filename(None, name_prefix='prefix_', name_suffix='suffix', extension='.txt') - self.assertEqual('prefix_suffix.txt', filename, - "Returns correct string when no base_name") - filename = generate_filename('event-strategy-v3_task-matchingpennies_events.json', - name_suffix='_blech', extension='.txt') - self.assertEqual('event-strategy-v3_task-matchingpennies_events_blech.txt', filename, - "Returns correct string when base_name with hyphens") - filename = generate_filename('HED7.2.0.xml', name_suffix='_blech', extension='.txt') - self.assertEqual('HED7.2.0_blech.txt', filename, "Returns correct string when base_name has periods") - - def test_generate_file_name_with_date(self): - file1 = generate_filename('mybase') - file1t = generate_filename('mybase', append_datetime=True) - self.assertGreater(len(file1t), len(file1), "generate_file_name generates a longer file when datetime is used.") - # TODO convert more of these tests. - # self.assertEqual(file1, "mybase", "generate_file_name should return the base when other arguments not set") - # file2 = generate_filename('mybase', name_prefix="prefix") + # file2 = clean_filename('mybase', name_prefix="prefix") # self.assertEqual(file2, "prefixmybase", "generate_file_name should return correct name when prefix set") - # file3 = generate_filename('mybase', name_prefix="prefix", extension=".json") + # file3 = clean_filename('mybase', name_prefix="prefix", extension=".json") # self.assertEqual(file3, "prefixmybase.json", "generate_file_name should return correct name for extension") - # file4 = generate_filename('mybase', name_suffix="suffix") + # file4 = clean_filename('mybase', name_suffix="suffix") # self.assertEqual(file4, "mybasesuffix", "generate_file_name should return correct name when suffix set") - # file5 = generate_filename('mybase', name_suffix="suffix", extension=".json") + # file5 = clean_filename('mybase', name_suffix="suffix", extension=".json") # self.assertEqual(file5, "mybasesuffix.json", "generate_file_name should return correct name for extension") - # file6 = generate_filename('mybase', name_prefix="prefix", name_suffix="suffix", extension=".json") + # file6 = clean_filename('mybase', name_prefix="prefix", name_suffix="suffix", extension=".json") # self.assertEqual(file6, "prefixmybasesuffix.json", # "generate_file_name should return correct name for all set") - # filename = generate_filename(None, name_prefix=None, name_suffix=None, extension=None) - # self.assertEqual('', filename, "Return empty when all arguments are none") - # filename = generate_filename(None, name_prefix=None, name_suffix=None, extension='.txt') - # self.assertEqual('', filename, - # "Return empty when base_name, prefix, and suffix are None, but extension is not") - # filename = generate_filename('c:/temp.json', name_prefix=None, name_suffix=None, extension='.txt') - # self.assertEqual('c_temp.txt', filename, - # "Returns stripped base_name + extension when prefix, and suffix are None") - # filename = generate_filename('temp.json', name_prefix='prefix_', name_suffix='_suffix', extension='.txt') + filename = clean_filename("") + self.assertEqual('', filename, "Return empty when all arguments are none") + filename = clean_filename(None) + self.assertEqual('', filename, + "Return empty when base_name, prefix, and suffix are None, but extension is not") + filename = clean_filename('c:/temp.json') + self.assertEqual('c_temp.json', filename, + "Returns stripped base_name + extension when prefix, and suffix are None") + # filename = clean_filename('temp.json', name_prefix='prefix_', name_suffix='_suffix', extension='.txt') # self.assertEqual('prefix_temp_suffix.txt', filename, # "Return stripped base_name + extension when prefix, and suffix are None") - # filename = generate_filename(None, name_prefix='prefix_', name_suffix='suffix', extension='.txt') + # filename = clean_filename(None, name_prefix='prefix_', name_suffix='suffix', extension='.txt') # self.assertEqual('prefix_suffix.txt', filename, # "Returns correct string when no base_name") - # filename = generate_filename('event-strategy-v3_task-matchingpennies_events.json', - # name_suffix='_blech', extension='.txt') + # filename = clean_filename('event-strategy-v3_task-matchingpennies_events.json', + # name_suffix='_blech', extension='.txt') # self.assertEqual('event-strategy-v3_task-matchingpennies_events_blech.txt', filename, # "Returns correct string when base_name with hyphens") - # filename = generate_filename('HED7.2.0.xml', name_suffix='_blech', extension='.txt') + # filename = clean_filename('HED7.2.0.xml', name_suffix='_blech', extension='.txt') # self.assertEqual('HED7.2.0_blech.txt', filename, "Returns correct string when base_name has periods") def test_get_dir_dictionary(self):