From a07257055d2b36dbd536f7a58113c550543554df Mon Sep 17 00:00:00 2001 From: jsconan Date: Fri, 27 Oct 2023 09:58:09 +0200 Subject: [PATCH 01/35] doc: fix the link to the documentation in the readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2fa7e6b..67bf416 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ pip install --upgrade git+ssh://git@github.com/cerbernetix/py-toolbox.git@develo `py-toolbox` offers several utilities per domain. -Please refer to the [documentation](./docs/README.md) for more information. +Please refer to the [documentation](https://github.com/cerbernetix/py-toolbox/blob/main/docs/README.md) for more information. ## Development From c721aa7cec88548a73259c7f9f9ebefdf6daaf32 Mon Sep 17 00:00:00 2001 From: jsconan Date: Fri, 27 Oct 2023 09:58:51 +0200 Subject: [PATCH 02/35] chore: update the changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b039bf6..2e7b13f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Fix the link to the documentation in the readme. + ## [0.9.1] - 2023-10-27 ### Fixed From 426775ab841106f9b4656a6d676d9d507ffd3f1a Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:17:40 +0100 Subject: [PATCH 03/35] feat: add an option for sorting the keys in JSON files --- src/cerbernetix/toolbox/files/__init__.py | 2 ++ src/cerbernetix/toolbox/files/json_file.py | 13 ++++++- tests/files/test_json_file.py | 41 +++++++++++++++++++++- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/cerbernetix/toolbox/files/__init__.py b/src/cerbernetix/toolbox/files/__init__.py index 7c84fdf..064687f 100644 --- a/src/cerbernetix/toolbox/files/__init__.py +++ b/src/cerbernetix/toolbox/files/__init__.py @@ -64,6 +64,7 @@ csv_data = file.read_zip_csv(data) ``` """ + from cerbernetix.toolbox.files.csv_file import ( CSV_DIALECT, CSV_ENCODING, @@ -83,6 +84,7 @@ from cerbernetix.toolbox.files.json_file import ( JSON_ENCODING, JSON_INDENT, + JSON_SORT_KEYS, JSONFile, read_json_file, write_json_file, diff --git a/src/cerbernetix/toolbox/files/json_file.py b/src/cerbernetix/toolbox/files/json_file.py index 58aefb2..51d69c6 100644 --- a/src/cerbernetix/toolbox/files/json_file.py +++ b/src/cerbernetix/toolbox/files/json_file.py @@ -35,6 +35,7 @@ json_data = file.read() ``` """ + import json from typing import Any @@ -46,6 +47,9 @@ # The default indent for JSON files JSON_INDENT = 4 +# The default value for whether or not to sort the keys in JSON files +JSON_SORT_KEYS = False + class JSONFile(FileManager): """Offers a simple API for reading and writing JSON files. @@ -60,6 +64,7 @@ class JSONFile(FileManager): binary (bool): The type of file, say text. It must always be False. encoding (str, optional): The file encoding. indent (int, optional): The line indent. + sort_keys (bool, optional): Whether or not to sort the keys. Examples: ```python @@ -92,6 +97,7 @@ def __init__( write: bool = False, encoding: str = JSON_ENCODING, indent: int = JSON_INDENT, + sort_keys: bool = JSON_SORT_KEYS, **kwargs, ): """Creates a file manager for JSON files. @@ -108,6 +114,7 @@ def __init__( Defaults to False. encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. indent (int, optional): The line indent. Defaults to JSON_INDENT. + sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. Examples: ```python @@ -153,6 +160,7 @@ def __init__( **kwargs, ) self.indent = indent + self.sort_keys = sort_keys def read(self) -> Any: """Reads the content from the file. @@ -212,7 +220,7 @@ def write(self, data: Any) -> int: """ return super().write( json.JSONEncoder( - sort_keys=True, + sort_keys=self.sort_keys, indent=self.indent, ).encode(data) ) @@ -251,6 +259,7 @@ def write_json_file( data: Any, encoding: str = JSON_ENCODING, indent: int = JSON_INDENT, + sort_keys: bool = JSON_SORT_KEYS, **kwargs, ) -> int: """Writes a JSON content to a file. @@ -260,6 +269,7 @@ def write_json_file( data (Any): The content to write to the file. encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. indent (int, optional): The line indent. Defaults to JSON_INDENT. + sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. Raises: OSError: If the file cannot be written. @@ -284,5 +294,6 @@ def write_json_file( filename, encoding=encoding, indent=indent, + sort_keys=sort_keys, **kwargs, ).write_file(data) diff --git a/tests/files/test_json_file.py b/tests/files/test_json_file.py index 6d7c2ab..b16b2f9 100644 --- a/tests/files/test_json_file.py +++ b/tests/files/test_json_file.py @@ -1,10 +1,12 @@ """Test the class for reading and writing JSON files.""" + import unittest from unittest.mock import Mock, patch from cerbernetix.toolbox.files import ( JSON_ENCODING, JSON_INDENT, + JSON_SORT_KEYS, JSONFile, read_json_file, write_json_file, @@ -12,6 +14,15 @@ JSON_DATA = {"name": "test", "level": 20, "keywords": ["one", "two"], "enabled": True} JSON_STRING = """{ + "name": "test", + "level": 20, + "keywords": [ + "one", + "two" + ], + "enabled": true +}""" +JSON_STRING_SORTED = """{ "enabled": true, "keywords": [ "one", @@ -36,6 +47,7 @@ def test_construction_default(self): self.assertEqual(file.filename, file_path) self.assertFalse(file.binary) self.assertEqual(file.indent, JSON_INDENT) + self.assertEqual(file.sort_keys, JSON_SORT_KEYS) self.assertEqual(file.encoding, JSON_ENCODING) self.assertIsNone(file._file) self.assertEqual(file._open_args, {}) @@ -45,13 +57,21 @@ def test_construction_params(self): file_path = "/root/folder/file" encoding = "ascii" indent = 2 + sort_keys = True newline = "\n" - file = JSONFile(file_path, encoding=encoding, indent=indent, newline=newline) + file = JSONFile( + file_path, + encoding=encoding, + indent=indent, + sort_keys=sort_keys, + newline=newline, + ) self.assertEqual(file.filename, file_path) self.assertFalse(file.binary) self.assertEqual(file.indent, indent) + self.assertEqual(file.sort_keys, sort_keys) self.assertEqual(file.encoding, encoding) self.assertIsNone(file._file) self.assertEqual(file._open_args, {"newline": newline}) @@ -217,6 +237,25 @@ def test_write_file(self, mock_file_open): mock_file.write.assert_called_with(JSON_STRING) mock_file.close.assert_called_once() + @patch("builtins.open") + def test_write_file_sorted_keys(self, mock_file_open): + """Tests a file can be written at once.""" + file_path = "/root/folder/file" + + count = len(JSON_STRING) + mock_file = Mock() + mock_file.write = Mock(return_value=count) + mock_file.close = Mock() + mock_file_open.return_value = mock_file + + file = JSONFile(file_path, sort_keys=True) + + self.assertEqual(file.write_file(JSON_DATA), count) + + mock_file_open.assert_called_once() + mock_file.write.assert_called_with(JSON_STRING_SORTED) + mock_file.close.assert_called_once() + @patch("builtins.open") def test_read(self, mock_file_open): """Tests a file can be read.""" From 5232b6f8359f20ebc99643708dd8431e4968af20 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:24:37 +0100 Subject: [PATCH 04/35] feat: add an option for skipping non conform keys in JSON files --- src/cerbernetix/toolbox/files/__init__.py | 1 + src/cerbernetix/toolbox/files/json_file.py | 13 +++++++++++++ tests/files/test_json_file.py | 5 +++++ 3 files changed, 19 insertions(+) diff --git a/src/cerbernetix/toolbox/files/__init__.py b/src/cerbernetix/toolbox/files/__init__.py index 064687f..41d81e1 100644 --- a/src/cerbernetix/toolbox/files/__init__.py +++ b/src/cerbernetix/toolbox/files/__init__.py @@ -84,6 +84,7 @@ from cerbernetix.toolbox.files.json_file import ( JSON_ENCODING, JSON_INDENT, + JSON_SKIP_KEYS, JSON_SORT_KEYS, JSONFile, read_json_file, diff --git a/src/cerbernetix/toolbox/files/json_file.py b/src/cerbernetix/toolbox/files/json_file.py index 51d69c6..fa904ab 100644 --- a/src/cerbernetix/toolbox/files/json_file.py +++ b/src/cerbernetix/toolbox/files/json_file.py @@ -50,6 +50,9 @@ # The default value for whether or not to sort the keys in JSON files JSON_SORT_KEYS = False +# The default value for whether or not to skip the keys not having an allowed type in JSON files +JSON_SKIP_KEYS = False + class JSONFile(FileManager): """Offers a simple API for reading and writing JSON files. @@ -65,6 +68,7 @@ class JSONFile(FileManager): encoding (str, optional): The file encoding. indent (int, optional): The line indent. sort_keys (bool, optional): Whether or not to sort the keys. + skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. Examples: ```python @@ -98,6 +102,7 @@ def __init__( encoding: str = JSON_ENCODING, indent: int = JSON_INDENT, sort_keys: bool = JSON_SORT_KEYS, + skip_keys: bool = JSON_SKIP_KEYS, **kwargs, ): """Creates a file manager for JSON files. @@ -115,6 +120,8 @@ def __init__( encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. indent (int, optional): The line indent. Defaults to JSON_INDENT. sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. + skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. + Defaults to JSON_SKIP_KEYS. Examples: ```python @@ -161,6 +168,7 @@ def __init__( ) self.indent = indent self.sort_keys = sort_keys + self.skip_keys = skip_keys def read(self) -> Any: """Reads the content from the file. @@ -220,6 +228,7 @@ def write(self, data: Any) -> int: """ return super().write( json.JSONEncoder( + skipkeys=self.skip_keys, sort_keys=self.sort_keys, indent=self.indent, ).encode(data) @@ -260,6 +269,7 @@ def write_json_file( encoding: str = JSON_ENCODING, indent: int = JSON_INDENT, sort_keys: bool = JSON_SORT_KEYS, + skip_keys: bool = JSON_SKIP_KEYS, **kwargs, ) -> int: """Writes a JSON content to a file. @@ -270,6 +280,8 @@ def write_json_file( encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. indent (int, optional): The line indent. Defaults to JSON_INDENT. sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. + skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. + Defaults to JSON_SKIP_KEYS. Raises: OSError: If the file cannot be written. @@ -295,5 +307,6 @@ def write_json_file( encoding=encoding, indent=indent, sort_keys=sort_keys, + skip_keys=skip_keys, **kwargs, ).write_file(data) diff --git a/tests/files/test_json_file.py b/tests/files/test_json_file.py index b16b2f9..22715be 100644 --- a/tests/files/test_json_file.py +++ b/tests/files/test_json_file.py @@ -6,6 +6,7 @@ from cerbernetix.toolbox.files import ( JSON_ENCODING, JSON_INDENT, + JSON_SKIP_KEYS, JSON_SORT_KEYS, JSONFile, read_json_file, @@ -48,6 +49,7 @@ def test_construction_default(self): self.assertFalse(file.binary) self.assertEqual(file.indent, JSON_INDENT) self.assertEqual(file.sort_keys, JSON_SORT_KEYS) + self.assertEqual(file.skip_keys, JSON_SKIP_KEYS) self.assertEqual(file.encoding, JSON_ENCODING) self.assertIsNone(file._file) self.assertEqual(file._open_args, {}) @@ -58,6 +60,7 @@ def test_construction_params(self): encoding = "ascii" indent = 2 sort_keys = True + skip_keys = True newline = "\n" file = JSONFile( @@ -65,6 +68,7 @@ def test_construction_params(self): encoding=encoding, indent=indent, sort_keys=sort_keys, + skip_keys=skip_keys, newline=newline, ) @@ -72,6 +76,7 @@ def test_construction_params(self): self.assertFalse(file.binary) self.assertEqual(file.indent, indent) self.assertEqual(file.sort_keys, sort_keys) + self.assertEqual(file.skip_keys, skip_keys) self.assertEqual(file.encoding, encoding) self.assertIsNone(file._file) self.assertEqual(file._open_args, {"newline": newline}) From 9e27af04ec8ecfa3ff925d370417012d67bae42f Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:28:31 +0100 Subject: [PATCH 05/35] feat: add an option for escaping non-ascii chars in JSON files --- src/cerbernetix/toolbox/files/__init__.py | 1 + src/cerbernetix/toolbox/files/json_file.py | 13 +++++++++++++ tests/files/test_json_file.py | 5 +++++ 3 files changed, 19 insertions(+) diff --git a/src/cerbernetix/toolbox/files/__init__.py b/src/cerbernetix/toolbox/files/__init__.py index 41d81e1..5ab66e4 100644 --- a/src/cerbernetix/toolbox/files/__init__.py +++ b/src/cerbernetix/toolbox/files/__init__.py @@ -83,6 +83,7 @@ from cerbernetix.toolbox.files.file_manager import FileManager from cerbernetix.toolbox.files.json_file import ( JSON_ENCODING, + JSON_ENSURE_ASCII, JSON_INDENT, JSON_SKIP_KEYS, JSON_SORT_KEYS, diff --git a/src/cerbernetix/toolbox/files/json_file.py b/src/cerbernetix/toolbox/files/json_file.py index fa904ab..6411ecc 100644 --- a/src/cerbernetix/toolbox/files/json_file.py +++ b/src/cerbernetix/toolbox/files/json_file.py @@ -53,6 +53,9 @@ # The default value for whether or not to skip the keys not having an allowed type in JSON files JSON_SKIP_KEYS = False +# The default value for escaping non-ascii chars in JSON files +JSON_ENSURE_ASCII = True + class JSONFile(FileManager): """Offers a simple API for reading and writing JSON files. @@ -69,6 +72,7 @@ class JSONFile(FileManager): indent (int, optional): The line indent. sort_keys (bool, optional): Whether or not to sort the keys. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. + ensure_ascii (bool, optional): Whether or not to escape non-ascii chars. Examples: ```python @@ -103,6 +107,7 @@ def __init__( indent: int = JSON_INDENT, sort_keys: bool = JSON_SORT_KEYS, skip_keys: bool = JSON_SKIP_KEYS, + ensure_ascii: bool = JSON_ENSURE_ASCII, **kwargs, ): """Creates a file manager for JSON files. @@ -122,6 +127,8 @@ def __init__( sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. Defaults to JSON_SKIP_KEYS. + ensure_ascii (bool, optional): Whether or not to escape non-ascii chars. + Defaults to JSON_ENSURE_ASCII. Examples: ```python @@ -169,6 +176,7 @@ def __init__( self.indent = indent self.sort_keys = sort_keys self.skip_keys = skip_keys + self.ensure_ascii = ensure_ascii def read(self) -> Any: """Reads the content from the file. @@ -229,6 +237,7 @@ def write(self, data: Any) -> int: return super().write( json.JSONEncoder( skipkeys=self.skip_keys, + ensure_ascii=self.ensure_ascii, sort_keys=self.sort_keys, indent=self.indent, ).encode(data) @@ -270,6 +279,7 @@ def write_json_file( indent: int = JSON_INDENT, sort_keys: bool = JSON_SORT_KEYS, skip_keys: bool = JSON_SKIP_KEYS, + ensure_ascii: bool = JSON_ENSURE_ASCII, **kwargs, ) -> int: """Writes a JSON content to a file. @@ -282,6 +292,8 @@ def write_json_file( sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. Defaults to JSON_SKIP_KEYS. + ensure_ascii (bool, optional): Whether or not to escape non-ascii chars. + Defaults to JSON_ENSURE_ASCII. Raises: OSError: If the file cannot be written. @@ -308,5 +320,6 @@ def write_json_file( indent=indent, sort_keys=sort_keys, skip_keys=skip_keys, + ensure_ascii=ensure_ascii, **kwargs, ).write_file(data) diff --git a/tests/files/test_json_file.py b/tests/files/test_json_file.py index 22715be..af409e5 100644 --- a/tests/files/test_json_file.py +++ b/tests/files/test_json_file.py @@ -5,6 +5,7 @@ from cerbernetix.toolbox.files import ( JSON_ENCODING, + JSON_ENSURE_ASCII, JSON_INDENT, JSON_SKIP_KEYS, JSON_SORT_KEYS, @@ -50,6 +51,7 @@ def test_construction_default(self): self.assertEqual(file.indent, JSON_INDENT) self.assertEqual(file.sort_keys, JSON_SORT_KEYS) self.assertEqual(file.skip_keys, JSON_SKIP_KEYS) + self.assertEqual(file.ensure_ascii, JSON_ENSURE_ASCII) self.assertEqual(file.encoding, JSON_ENCODING) self.assertIsNone(file._file) self.assertEqual(file._open_args, {}) @@ -61,6 +63,7 @@ def test_construction_params(self): indent = 2 sort_keys = True skip_keys = True + ensure_ascii = True newline = "\n" file = JSONFile( @@ -69,6 +72,7 @@ def test_construction_params(self): indent=indent, sort_keys=sort_keys, skip_keys=skip_keys, + ensure_ascii=ensure_ascii, newline=newline, ) @@ -77,6 +81,7 @@ def test_construction_params(self): self.assertEqual(file.indent, indent) self.assertEqual(file.sort_keys, sort_keys) self.assertEqual(file.skip_keys, skip_keys) + self.assertEqual(file.ensure_ascii, ensure_ascii) self.assertEqual(file.encoding, encoding) self.assertIsNone(file._file) self.assertEqual(file._open_args, {"newline": newline}) From e4ff6c6915bdde965e0736a10497b9d01c990912 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:30:12 +0100 Subject: [PATCH 06/35] feat: add an option for forbidding control chars in JSON files --- src/cerbernetix/toolbox/files/__init__.py | 1 + src/cerbernetix/toolbox/files/json_file.py | 14 ++++++++++++-- tests/files/test_json_file.py | 5 +++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/cerbernetix/toolbox/files/__init__.py b/src/cerbernetix/toolbox/files/__init__.py index 5ab66e4..d168923 100644 --- a/src/cerbernetix/toolbox/files/__init__.py +++ b/src/cerbernetix/toolbox/files/__init__.py @@ -87,6 +87,7 @@ JSON_INDENT, JSON_SKIP_KEYS, JSON_SORT_KEYS, + JSON_STRICT, JSONFile, read_json_file, write_json_file, diff --git a/src/cerbernetix/toolbox/files/json_file.py b/src/cerbernetix/toolbox/files/json_file.py index 6411ecc..140eb0d 100644 --- a/src/cerbernetix/toolbox/files/json_file.py +++ b/src/cerbernetix/toolbox/files/json_file.py @@ -56,6 +56,9 @@ # The default value for escaping non-ascii chars in JSON files JSON_ENSURE_ASCII = True +# The default value for forbidding the control chars in JSON files +JSON_STRICT = True + class JSONFile(FileManager): """Offers a simple API for reading and writing JSON files. @@ -73,6 +76,7 @@ class JSONFile(FileManager): sort_keys (bool, optional): Whether or not to sort the keys. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. ensure_ascii (bool, optional): Whether or not to escape non-ascii chars. + strict (bool, optional): Whether or not to forbid control chars. Examples: ```python @@ -108,6 +112,7 @@ def __init__( sort_keys: bool = JSON_SORT_KEYS, skip_keys: bool = JSON_SKIP_KEYS, ensure_ascii: bool = JSON_ENSURE_ASCII, + strict: bool = JSON_STRICT, **kwargs, ): """Creates a file manager for JSON files. @@ -129,6 +134,8 @@ def __init__( Defaults to JSON_SKIP_KEYS. ensure_ascii (bool, optional): Whether or not to escape non-ascii chars. Defaults to JSON_ENSURE_ASCII. + strict (bool, optional): Whether or not to forbid control chars. + Defaults to JSON_STRICT. Examples: ```python @@ -177,6 +184,7 @@ def __init__( self.sort_keys = sort_keys self.skip_keys = skip_keys self.ensure_ascii = ensure_ascii + self.strict = strict def read(self) -> Any: """Reads the content from the file. @@ -206,7 +214,7 @@ def read(self) -> Any: if not data: return None - return json.JSONDecoder().decode(data) + return json.JSONDecoder(strict=self.strict).decode(data) def write(self, data: Any) -> int: """Writes content to the file. @@ -247,6 +255,7 @@ def write(self, data: Any) -> int: def read_json_file( filename: str, encoding: str = JSON_ENCODING, + strict: bool = JSON_STRICT, **kwargs, ) -> Any: """Reads a JSON content from a file. @@ -254,6 +263,7 @@ def read_json_file( Args: filename (str): The path to the file to read. encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. + strict (bool, optional): Whether or not to forbid control chars. Defaults to JSON_STRICT. Raises: OSError: If the file cannot be read. @@ -269,7 +279,7 @@ def read_json_file( json_data = read_json_file('path/to/file', encoding='UTF-8') ``` """ - return JSONFile(filename, encoding=encoding, **kwargs).read_file() + return JSONFile(filename, encoding=encoding, strict=strict, **kwargs).read_file() def write_json_file( diff --git a/tests/files/test_json_file.py b/tests/files/test_json_file.py index af409e5..3eade6d 100644 --- a/tests/files/test_json_file.py +++ b/tests/files/test_json_file.py @@ -9,6 +9,7 @@ JSON_INDENT, JSON_SKIP_KEYS, JSON_SORT_KEYS, + JSON_STRICT, JSONFile, read_json_file, write_json_file, @@ -52,6 +53,7 @@ def test_construction_default(self): self.assertEqual(file.sort_keys, JSON_SORT_KEYS) self.assertEqual(file.skip_keys, JSON_SKIP_KEYS) self.assertEqual(file.ensure_ascii, JSON_ENSURE_ASCII) + self.assertEqual(file.strict, JSON_STRICT) self.assertEqual(file.encoding, JSON_ENCODING) self.assertIsNone(file._file) self.assertEqual(file._open_args, {}) @@ -64,6 +66,7 @@ def test_construction_params(self): sort_keys = True skip_keys = True ensure_ascii = True + strict = True newline = "\n" file = JSONFile( @@ -73,6 +76,7 @@ def test_construction_params(self): sort_keys=sort_keys, skip_keys=skip_keys, ensure_ascii=ensure_ascii, + strict=strict, newline=newline, ) @@ -82,6 +86,7 @@ def test_construction_params(self): self.assertEqual(file.sort_keys, sort_keys) self.assertEqual(file.skip_keys, skip_keys) self.assertEqual(file.ensure_ascii, ensure_ascii) + self.assertEqual(file.strict, strict) self.assertEqual(file.encoding, encoding) self.assertIsNone(file._file) self.assertEqual(file._open_args, {"newline": newline}) From 3312c221f3508f70bcb8d535cbbd872b9a1160a4 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:45:45 +0100 Subject: [PATCH 07/35] feat: add an option for defining the separators in JSON files --- src/cerbernetix/toolbox/files/__init__.py | 1 + src/cerbernetix/toolbox/files/json_file.py | 13 +++++++++++ tests/files/test_json_file.py | 25 ++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/src/cerbernetix/toolbox/files/__init__.py b/src/cerbernetix/toolbox/files/__init__.py index d168923..9be61a7 100644 --- a/src/cerbernetix/toolbox/files/__init__.py +++ b/src/cerbernetix/toolbox/files/__init__.py @@ -85,6 +85,7 @@ JSON_ENCODING, JSON_ENSURE_ASCII, JSON_INDENT, + JSON_SEPARATORS, JSON_SKIP_KEYS, JSON_SORT_KEYS, JSON_STRICT, diff --git a/src/cerbernetix/toolbox/files/json_file.py b/src/cerbernetix/toolbox/files/json_file.py index 140eb0d..06ea5b7 100644 --- a/src/cerbernetix/toolbox/files/json_file.py +++ b/src/cerbernetix/toolbox/files/json_file.py @@ -47,6 +47,9 @@ # The default indent for JSON files JSON_INDENT = 4 +# The default separators for JSON files +JSON_SEPARATORS = None + # The default value for whether or not to sort the keys in JSON files JSON_SORT_KEYS = False @@ -73,6 +76,7 @@ class JSONFile(FileManager): binary (bool): The type of file, say text. It must always be False. encoding (str, optional): The file encoding. indent (int, optional): The line indent. + separators (tuple, optional): The separators for key/values, a.k.a `(', ', ': ')`. sort_keys (bool, optional): Whether or not to sort the keys. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. ensure_ascii (bool, optional): Whether or not to escape non-ascii chars. @@ -109,6 +113,7 @@ def __init__( write: bool = False, encoding: str = JSON_ENCODING, indent: int = JSON_INDENT, + separators: tuple = JSON_SEPARATORS, sort_keys: bool = JSON_SORT_KEYS, skip_keys: bool = JSON_SKIP_KEYS, ensure_ascii: bool = JSON_ENSURE_ASCII, @@ -129,6 +134,8 @@ def __init__( Defaults to False. encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. indent (int, optional): The line indent. Defaults to JSON_INDENT. + separators (tuple, optional): The separators for key/values, a.k.a `(', ', ': ')`. + Defaults to JSON_SEPARATORS. sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. Defaults to JSON_SKIP_KEYS. @@ -181,6 +188,7 @@ def __init__( **kwargs, ) self.indent = indent + self.separators = separators self.sort_keys = sort_keys self.skip_keys = skip_keys self.ensure_ascii = ensure_ascii @@ -248,6 +256,7 @@ def write(self, data: Any) -> int: ensure_ascii=self.ensure_ascii, sort_keys=self.sort_keys, indent=self.indent, + separators=self.separators, ).encode(data) ) @@ -287,6 +296,7 @@ def write_json_file( data: Any, encoding: str = JSON_ENCODING, indent: int = JSON_INDENT, + separators: tuple = JSON_SEPARATORS, sort_keys: bool = JSON_SORT_KEYS, skip_keys: bool = JSON_SKIP_KEYS, ensure_ascii: bool = JSON_ENSURE_ASCII, @@ -299,6 +309,8 @@ def write_json_file( data (Any): The content to write to the file. encoding (str, optional): The file encoding. Defaults to JSON_ENCODING. indent (int, optional): The line indent. Defaults to JSON_INDENT. + separators (tuple, optional): The separators for key/values, a.k.a `(', ', ': ')`. + Defaults to JSON_SEPARATORS. sort_keys (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. skip_keys (bool, optional): Whether or not to skip the keys not having an allowed type. Defaults to JSON_SKIP_KEYS. @@ -328,6 +340,7 @@ def write_json_file( filename, encoding=encoding, indent=indent, + separators=separators, sort_keys=sort_keys, skip_keys=skip_keys, ensure_ascii=ensure_ascii, diff --git a/tests/files/test_json_file.py b/tests/files/test_json_file.py index 3eade6d..2b42e59 100644 --- a/tests/files/test_json_file.py +++ b/tests/files/test_json_file.py @@ -7,6 +7,7 @@ JSON_ENCODING, JSON_ENSURE_ASCII, JSON_INDENT, + JSON_SEPARATORS, JSON_SKIP_KEYS, JSON_SORT_KEYS, JSON_STRICT, @@ -25,6 +26,7 @@ ], "enabled": true }""" +JSON_STRING_PACKED = """{"name":"test","level":20,"keywords":["one","two"],"enabled":true}""" JSON_STRING_SORTED = """{ "enabled": true, "keywords": [ @@ -50,6 +52,7 @@ def test_construction_default(self): self.assertEqual(file.filename, file_path) self.assertFalse(file.binary) self.assertEqual(file.indent, JSON_INDENT) + self.assertEqual(file.separators, JSON_SEPARATORS) self.assertEqual(file.sort_keys, JSON_SORT_KEYS) self.assertEqual(file.skip_keys, JSON_SKIP_KEYS) self.assertEqual(file.ensure_ascii, JSON_ENSURE_ASCII) @@ -63,6 +66,7 @@ def test_construction_params(self): file_path = "/root/folder/file" encoding = "ascii" indent = 2 + separators = (",", ":") sort_keys = True skip_keys = True ensure_ascii = True @@ -73,6 +77,7 @@ def test_construction_params(self): file_path, encoding=encoding, indent=indent, + separators=separators, sort_keys=sort_keys, skip_keys=skip_keys, ensure_ascii=ensure_ascii, @@ -83,6 +88,7 @@ def test_construction_params(self): self.assertEqual(file.filename, file_path) self.assertFalse(file.binary) self.assertEqual(file.indent, indent) + self.assertEqual(file.separators, separators) self.assertEqual(file.sort_keys, sort_keys) self.assertEqual(file.skip_keys, skip_keys) self.assertEqual(file.ensure_ascii, ensure_ascii) @@ -271,6 +277,25 @@ def test_write_file_sorted_keys(self, mock_file_open): mock_file.write.assert_called_with(JSON_STRING_SORTED) mock_file.close.assert_called_once() + @patch("builtins.open") + def test_write_file_packed(self, mock_file_open): + """Tests a file can be written at once.""" + file_path = "/root/folder/file" + + count = len(JSON_STRING) + mock_file = Mock() + mock_file.write = Mock(return_value=count) + mock_file.close = Mock() + mock_file_open.return_value = mock_file + + file = JSONFile(file_path, indent=None, separators=(",", ":")) + + self.assertEqual(file.write_file(JSON_DATA), count) + + mock_file_open.assert_called_once() + mock_file.write.assert_called_with(JSON_STRING_PACKED) + mock_file.close.assert_called_once() + @patch("builtins.open") def test_read(self, mock_file_open): """Tests a file can be read.""" From e8bc3365285ff183218eebb227e64e86a34e2152 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:49:11 +0100 Subject: [PATCH 08/35] chore: update the changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e7b13f..f03869b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Add options to JSONFile implementation (sort_keys, skip_keys, ensure_ascii, separators, strict). + ### Fixed - Fix the link to the documentation in the readme. From 543152526bff06646620b47678656195ab8fec49 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 11 Feb 2024 17:50:09 +0100 Subject: [PATCH 09/35] doc: update the documentation --- docs/toolbox.files.json_file.md | 48 ++++++++++++++++++++++++++++----- docs/toolbox.files.md | 5 ++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/docs/toolbox.files.json_file.md b/docs/toolbox.files.json_file.md index 7f75082..8d1e219 100644 --- a/docs/toolbox.files.json_file.md +++ b/docs/toolbox.files.json_file.md @@ -46,15 +46,25 @@ with file: --------------- - **JSON_ENCODING** - **JSON_INDENT** +- **JSON_SEPARATORS** +- **JSON_SORT_KEYS** +- **JSON_SKIP_KEYS** +- **JSON_ENSURE_ASCII** +- **JSON_STRICT** --- - + ## function `read_json_file` ```python -read_json_file(filename: str, encoding: str = 'utf-8', **kwargs) → Any +read_json_file( + filename: str, + encoding: str = 'utf-8', + strict: bool = True, + **kwargs +) → Any ``` Reads a JSON content from a file. @@ -65,6 +75,7 @@ Reads a JSON content from a file. - `filename` (str): The path to the file to read. - `encoding` (str, optional): The file encoding. Defaults to JSON_ENCODING. + - `strict` (bool, optional): Whether or not to forbid control chars. Defaults to JSON_STRICT. @@ -91,7 +102,7 @@ json_data = read_json_file('path/to/file', encoding='UTF-8') --- - + ## function `write_json_file` @@ -101,6 +112,10 @@ write_json_file( data: Any, encoding: str = 'utf-8', indent: int = 4, + separators: tuple = None, + sort_keys: bool = False, + skip_keys: bool = False, + ensure_ascii: bool = True, **kwargs ) → int ``` @@ -115,6 +130,10 @@ Writes a JSON content to a file. - `data` (Any): The content to write to the file. - `encoding` (str, optional): The file encoding. Defaults to JSON_ENCODING. - `indent` (int, optional): The line indent. Defaults to JSON_INDENT. + - `separators` (tuple, optional): The separators for key/values, a.k.a `(', ', ': ')`. Defaults to JSON_SEPARATORS. + - `sort_keys` (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. + - `skip_keys` (bool, optional): Whether or not to skip the keys not having an allowed type. Defaults to JSON_SKIP_KEYS. + - `ensure_ascii` (bool, optional): Whether or not to escape non-ascii chars. Defaults to JSON_ENSURE_ASCII. @@ -146,7 +165,7 @@ write_json_file('path/to/file', json_data, encoding='UTF-8', indent=2) --- - + ## class `JSONFile` Offers a simple API for reading and writing JSON files. @@ -163,6 +182,11 @@ The read API reads all the content at once, and so do the write API too. - `binary` (bool): The type of file, say text. It must always be False. - `encoding` (str, optional): The file encoding. - `indent` (int, optional): The line indent. + - `separators` (tuple, optional): The separators for key/values, a.k.a `(', ', ': ')`. + - `sort_keys` (bool, optional): Whether or not to sort the keys. + - `skip_keys` (bool, optional): Whether or not to skip the keys not having an allowed type. + - `ensure_ascii` (bool, optional): Whether or not to escape non-ascii chars. + - `strict` (bool, optional): Whether or not to forbid control chars. @@ -187,7 +211,7 @@ with file(create=True): json = file.read_file() ``` - + ### method `__init__` @@ -200,6 +224,11 @@ __init__( write: bool = False, encoding: str = 'utf-8', indent: int = 4, + separators: tuple = None, + sort_keys: bool = False, + skip_keys: bool = False, + ensure_ascii: bool = True, + strict: bool = True, **kwargs ) ``` @@ -217,6 +246,11 @@ Creates a file manager for JSON files. - `write` (bool, optional): Expect to also write to the file. Defaults to False. - `encoding` (str, optional): The file encoding. Defaults to JSON_ENCODING. - `indent` (int, optional): The line indent. Defaults to JSON_INDENT. + - `separators` (tuple, optional): The separators for key/values, a.k.a `(', ', ': ')`. Defaults to JSON_SEPARATORS. + - `sort_keys` (bool, optional): Whether or not to sort the keys. Defaults to JSON_SORT_KEYS. + - `skip_keys` (bool, optional): Whether or not to skip the keys not having an allowed type. Defaults to JSON_SKIP_KEYS. + - `ensure_ascii` (bool, optional): Whether or not to escape non-ascii chars. Defaults to JSON_ENSURE_ASCII. + - `strict` (bool, optional): Whether or not to forbid control chars. Defaults to JSON_STRICT. @@ -424,7 +458,7 @@ size = file.size --- - + ### method `read` @@ -464,7 +498,7 @@ with file: --- - + ### method `write` diff --git a/docs/toolbox.files.md b/docs/toolbox.files.md index dce1f8f..2f7d14f 100644 --- a/docs/toolbox.files.md +++ b/docs/toolbox.files.md @@ -76,7 +76,12 @@ csv_data = file.read_zip_csv(data) - **CSV_DIALECT** - **CSV_ENCODING** - **JSON_ENCODING** +- **JSON_ENSURE_ASCII** - **JSON_INDENT** +- **JSON_SEPARATORS** +- **JSON_SKIP_KEYS** +- **JSON_SORT_KEYS** +- **JSON_STRICT** From 4ba50c5d83d444c989306323b20abaccc5528e11 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 14 Feb 2024 21:12:56 +0100 Subject: [PATCH 10/35] feat: set the default CSV dialect to 'excel' to reflect the default value from the Python library --- src/cerbernetix/toolbox/files/csv_file.py | 3 ++- tests/files/test_csv_file.py | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/cerbernetix/toolbox/files/csv_file.py b/src/cerbernetix/toolbox/files/csv_file.py index e1ad18e..81e4f2c 100644 --- a/src/cerbernetix/toolbox/files/csv_file.py +++ b/src/cerbernetix/toolbox/files/csv_file.py @@ -39,6 +39,7 @@ first = file.read() ``` """ + from __future__ import annotations import csv @@ -52,7 +53,7 @@ CSV_ENCODING = "utf-8" # The default CSV dialect -CSV_DIALECT = "unix" +CSV_DIALECT = "excel" # The amount of bytes to read for auto-detecting the CSV dialect CSV_SAMPLE_SIZE = 1024 diff --git a/tests/files/test_csv_file.py b/tests/files/test_csv_file.py index ad9bd67..82c908c 100644 --- a/tests/files/test_csv_file.py +++ b/tests/files/test_csv_file.py @@ -1,4 +1,5 @@ """Test the class for reading and writing CSV files.""" + import unittest import zipfile from typing import Iterator @@ -25,14 +26,14 @@ ["Jane", "Doe", "20", "Paris"], ] CSV_LINES_STRING = [ - '"first_name","last_name","age","city"\n', - '"John","Smith","18","London"\n', - '"Jane","Doe","20","Paris"\n', + "first_name,last_name,age,city\r\n", + "John,Smith,18,London\r\n", + "Jane,Doe,20,Paris\r\n", ] CSV_LINES_REDUCED = [ - '"first_name","last_name"\n', - '"John","Smith"\n', - '"Jane","Doe"\n', + "first_name,last_name\r\n", + "John,Smith\r\n", + "Jane,Doe\r\n", ] CSV_STRING = "".join(CSV_LINES_STRING) From ce514248ef1e29c6825dbebf8ede25dad3407c7c Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 14 Feb 2024 21:48:06 +0100 Subject: [PATCH 11/35] feat: set the default CSV dialect to 'auto' when reading and to 'excel' when writing --- src/cerbernetix/toolbox/files/__init__.py | 1 + src/cerbernetix/toolbox/files/csv_file.py | 21 ++++++++++-------- tests/files/test_csv_file.py | 27 +++++++++++++++++------ 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/cerbernetix/toolbox/files/__init__.py b/src/cerbernetix/toolbox/files/__init__.py index 9be61a7..e98bc30 100644 --- a/src/cerbernetix/toolbox/files/__init__.py +++ b/src/cerbernetix/toolbox/files/__init__.py @@ -66,6 +66,7 @@ """ from cerbernetix.toolbox.files.csv_file import ( + CSV_AUTO, CSV_DIALECT, CSV_ENCODING, CSVFile, diff --git a/src/cerbernetix/toolbox/files/csv_file.py b/src/cerbernetix/toolbox/files/csv_file.py index 81e4f2c..3adb4d4 100644 --- a/src/cerbernetix/toolbox/files/csv_file.py +++ b/src/cerbernetix/toolbox/files/csv_file.py @@ -55,6 +55,9 @@ # The default CSV dialect CSV_DIALECT = "excel" +# The value for auto-detecting the CSV dialect +CSV_AUTO = "auto" + # The amount of bytes to read for auto-detecting the CSV dialect CSV_SAMPLE_SIZE = 1024 @@ -140,7 +143,7 @@ def __init__( read: bool = False, write: bool = False, encoding: str = CSV_ENCODING, - dialect: str = CSV_DIALECT, + dialect: str = CSV_AUTO, **kwargs, ): r"""Creates a file manager for CSV files. @@ -158,7 +161,7 @@ def __init__( encoding (str, optional): The file encoding. Defaults to CSV_ENCODING. dialect (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. - Defaults to CSV_DIALECT. + Defaults to CSV_AUTO for reading or to CSV_DIALECT for writing. delimiter (str, optional): A one-character string used to separate fields. Defaults to ",". doublequote (bool, optional): Controls how instances of quotechar appearing inside a @@ -401,7 +404,7 @@ def read(self) -> dict | list: reader = csv.DictReader dialect = self.dialect - if dialect == "auto": + if dialect == CSV_AUTO: dialect = csv.Sniffer().sniff(self._file.read(CSV_SAMPLE_SIZE)) self._file.seek(0) @@ -462,7 +465,7 @@ def write(self, data: dict | list) -> int: writer = csv.writer dialect = self.dialect - if dialect == "auto": + if dialect == CSV_AUTO: dialect = CSV_DIALECT self._writer = writer(self._file, dialect=dialect, **kwargs) @@ -476,7 +479,7 @@ def write(self, data: dict | list) -> int: def read_csv_file( filename: str, encoding: str = CSV_ENCODING, - dialect: str = CSV_DIALECT, + dialect: str = CSV_AUTO, iterator: bool = False, **kwargs, ) -> Iterable[dict | list]: @@ -490,7 +493,7 @@ def read_csv_file( encoding (str, optional): The file encoding. Defaults to CSV_ENCODING. dialect (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. - Defaults to CSV_DIALECT. + Defaults to CSV_AUTO. iterator (bool, optional): When True, the function will return an iterator instead of a list. Defaults to False. delimiter (str, optional): A one-character string used to separate fields. @@ -623,7 +626,7 @@ def read_zip_csv( filename: str = None, encoding: str = CSV_ENCODING, decoding_errors: str = "ignore", - dialect: str = CSV_DIALECT, + dialect: str = CSV_AUTO, iterator: bool = False, **kwargs, ) -> Iterable[dict | list]: @@ -643,7 +646,7 @@ def read_zip_csv( Defaults to "ignore". dialect (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. - Defaults to CSV_DIALECT. + Defaults to CSV_AUTO. iterator (bool, optional): When True, the function will return an iterator instead of a list. Defaults to False. delimiter (str, optional): A one-character string used to separate fields. @@ -705,7 +708,7 @@ def read_zip_csv( else: reader_factory = csv.DictReader - if dialect == "auto": + if dialect == CSV_AUTO: dialect = csv.Sniffer().sniff(text[:CSV_SAMPLE_SIZE]) lines = re.split(r"[\r\n]+", text.strip("\r\n")) diff --git a/tests/files/test_csv_file.py b/tests/files/test_csv_file.py index 82c908c..498c3c7 100644 --- a/tests/files/test_csv_file.py +++ b/tests/files/test_csv_file.py @@ -6,6 +6,7 @@ from unittest.mock import MagicMock, Mock, patch from cerbernetix.toolbox.files import ( + CSV_AUTO, CSV_DIALECT, CSV_ENCODING, CSVFile, @@ -51,7 +52,7 @@ def test_construction_default(self): self.assertEqual(file.filename, file_path) self.assertFalse(file.binary) - self.assertEqual(file.dialect, CSV_DIALECT) + self.assertEqual(file.dialect, CSV_AUTO) self.assertEqual(file.encoding, CSV_ENCODING) self.assertIsNone(file._file) self.assertEqual(file._open_args, {"newline": ""}) @@ -231,7 +232,8 @@ def test_close_auto(self, mock_file_open): CSV_LINES_HEADLESS, ], ["list", {"fieldnames": False}, CSV_LINES_STRING, CSV_LINES_LIST], - ["auto", {"dialect": "auto"}, CSV_LINES_STRING, CSV_LINES_DICT], + ["auto", {"dialect": CSV_AUTO}, CSV_LINES_STRING, CSV_LINES_DICT], + ["dialect", {"dialect": CSV_DIALECT}, CSV_LINES_STRING, CSV_LINES_DICT], ] ) def test_read_file(self, _, params, data, expected): @@ -296,7 +298,8 @@ def test_read_file_iterator(self, mock_file_open): CSV_LINES_LIST[1:], "".join(CSV_LINES_STRING[1:]), ], - ["auto", {"dialect": "auto"}, CSV_LINES_DICT, CSV_STRING], + ["auto", {"dialect": CSV_AUTO}, CSV_LINES_DICT, CSV_STRING], + ["dialect", {"dialect": CSV_DIALECT}, CSV_LINES_DICT, CSV_STRING], ] ) def test_write_file(self, _, params, data, expected): @@ -326,17 +329,24 @@ def write(line): mock_file.write.assert_called() mock_file.close.assert_called_once() + @test_cases( + [ + [CSV_AUTO], + [CSV_DIALECT], + ] + ) @patch("builtins.open") - def test_read(self, mock_file_open): + def test_read(self, dialect, mock_file_open): """Tests a file can be read line by line.""" file_path = "/root/folder/file" mock_file = MagicMock() mock_file.close = Mock() mock_file.__iter__.return_value = CSV_LINES_STRING + mock_file.read.return_value = CSV_STRING mock_file_open.return_value = mock_file - file = CSVFile(file_path) + file = CSVFile(file_path, dialect=dialect) self.assertRaises(ValueError, file.read) @@ -427,6 +437,7 @@ def test_iterator(self, mock_file_open): mock_file = MagicMock() mock_file.close = Mock() mock_file.__iter__.return_value = CSV_LINES_STRING + mock_file.read.return_value = CSV_STRING mock_file_open.return_value = mock_file file = CSVFile(file_path) @@ -457,6 +468,7 @@ def test_read_csv_file(self, mock_file_open): mock_file = MagicMock() mock_file.close = Mock() mock_file.__iter__.return_value = CSV_LINES_STRING + mock_file.read.return_value = CSV_STRING mock_file_open.return_value = mock_file result = read_csv_file(file_path) @@ -474,6 +486,7 @@ def test_read_csv_file_iterator(self, mock_file_open): mock_file = MagicMock() mock_file.close = Mock() mock_file.__iter__.return_value = CSV_LINES_STRING + mock_file.read.return_value = CSV_STRING mock_file_open.return_value = mock_file result = read_csv_file(file_path, iterator=True) @@ -539,8 +552,8 @@ def write(line): CSV_LINES_LIST, ], [ - "dialect auto", - {"dialect": "auto"}, + "dialect", + {"dialect": CSV_DIALECT}, "FOO.CSV", CSV_STRING, CSV_LINES_DICT, From 03dff9bceaffcea969bbd82145290a17df4d089b Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 14 Feb 2024 21:50:59 +0100 Subject: [PATCH 12/35] chore: update the changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f03869b..70a10c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Add options to JSONFile implementation (sort_keys, skip_keys, ensure_ascii, separators, strict). +- Add options to JSONFile implementation (`sort_keys`, `skip_keys`, `ensure_ascii`, `separators`, `strict`). +- Set the default CSV dialect to `'excel'` when writing (this reflects the default value from the Python library). +- Set the default CSV dialect to `'auto'` when reading (the dialect will be sniffed from the first few rows). ### Fixed From 5acda8ab589f4befbca8a03b61fcd5e8d35742ac Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 14 Feb 2024 21:51:39 +0100 Subject: [PATCH 13/35] doc: update the documentation --- docs/toolbox.files.csv_file.md | 35 +++++++++++++++++----------------- docs/toolbox.files.md | 1 + 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/toolbox.files.csv_file.md b/docs/toolbox.files.csv_file.md index d239688..e827860 100644 --- a/docs/toolbox.files.csv_file.md +++ b/docs/toolbox.files.csv_file.md @@ -50,6 +50,7 @@ with file: --------------- - **CSV_ENCODING** - **CSV_DIALECT** +- **CSV_AUTO** - **CSV_SAMPLE_SIZE** - **CSV_READER_PARAMS** - **CSV_WRITER_PARAMS** @@ -57,7 +58,7 @@ with file: --- - + ## function `read_csv_file` @@ -65,7 +66,7 @@ with file: read_csv_file( filename: 'str', encoding: 'str' = 'utf-8', - dialect: 'str' = 'unix', + dialect: 'str' = 'auto', iterator: 'bool' = False, **kwargs ) → Iterable[dict | list] @@ -81,7 +82,7 @@ The returned value can be either a list (default) or an iterator (when the itera - `filename` (str): The path to the file to read. - `encoding` (str, optional): The file encoding. Defaults to CSV_ENCODING. - - `dialect` (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. Defaults to CSV_DIALECT. + - `dialect` (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. Defaults to CSV_AUTO. - `iterator` (bool, optional): When True, the function will return an iterator instead of a list. Defaults to False. - `delimiter` (str, optional): A one-character string used to separate fields. Defaults to ','. - `doublequote` (bool, optional): Controls how instances of quotechar appearing inside a field should themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. Defaults to True. @@ -123,7 +124,7 @@ for row in read_csv_file('path/to/file', iterator=True): --- - + ## function `write_csv_file` @@ -132,7 +133,7 @@ write_csv_file( filename: 'str', data: 'Iterable[dict | list]', encoding: 'str' = 'utf-8', - dialect: 'str' = 'unix', + dialect: 'str' = 'excel', **kwargs ) → int ``` @@ -189,7 +190,7 @@ write_csv_file('path/to/file', csv_data, encoding='UTF-8', dialect='excel') --- - + ## function `read_zip_csv` @@ -199,7 +200,7 @@ read_zip_csv( filename: 'str' = None, encoding: 'str' = 'utf-8', decoding_errors: 'str' = 'ignore', - dialect: 'str' = 'unix', + dialect: 'str' = 'auto', iterator: 'bool' = False, **kwargs ) → Iterable[dict | list] @@ -217,7 +218,7 @@ The returned value can be either a list (default) or an iterator (when the itera - `filename` (str, optional): The name of the file to extract from the zip If omitted, the first file having a '.csv' extension will be selected. Defaults to None. - `encoding` (str, optional): The file encoding. Defaults to CSV_ENCODING. - `decoding_errors` (str, optional): Controls how decoding errors are handled. If 'strict', a UnicodeError exception is raised. Other possible values are 'ignore', 'replace', and any other name registered via codecs.register_error(). See Error Handlers for details. Defaults to "ignore". - - `dialect` (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. Defaults to CSV_DIALECT. + - `dialect` (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. Defaults to CSV_AUTO. - `iterator` (bool, optional): When True, the function will return an iterator instead of a list. Defaults to False. - `delimiter` (str, optional): A one-character string used to separate fields. Defaults to ','. - `doublequote` (bool, optional): Controls how instances of quotechar appearing inside a field should themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. Defaults to True. @@ -265,7 +266,7 @@ with open('path/to/file.zip', 'rb') as file: --- - + ## class `CSVFile` Offers a simple API for reading and writing CSV files. @@ -310,7 +311,7 @@ with file(create=True): csv = file.read_file() ``` - + ### method `__init__` @@ -322,7 +323,7 @@ __init__( read: 'bool' = False, write: 'bool' = False, encoding: 'str' = 'utf-8', - dialect: 'str' = 'unix', + dialect: 'str' = 'auto', **kwargs ) ``` @@ -339,7 +340,7 @@ Creates a file manager for CSV files. - `read` (bool, optional): Expect to also read the file. Defaults to False. - `write` (bool, optional): Expect to also write to the file. Defaults to False. - `encoding` (str, optional): The file encoding. Defaults to CSV_ENCODING. - - `dialect` (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. Defaults to CSV_DIALECT. + - `dialect` (str, optional): The CSV dialect to use. If 'auto' is given, the reader will try detecting the CSV dialect by reading a sample at the head of the file. Defaults to CSV_AUTO for reading or to CSV_DIALECT for writing. - `delimiter` (str, optional): A one-character string used to separate fields. Defaults to ",". - `doublequote` (bool, optional): Controls how instances of quotechar appearing inside a field should themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. Defaults to True. - `escapechar` (str, optional): A one-character string used by the writer to escape the delimiter if quoting is set to QUOTE_NONE and the quotechar if doublequote is False. On reading, the escapechar removes any special meaning from the following character. Defaults to None, which disables escaping. @@ -566,7 +567,7 @@ size = file.size --- - + ### method `close` @@ -604,7 +605,7 @@ file.close() --- - + ### method `read` @@ -648,7 +649,7 @@ csv_data = [row for row in file] --- - + ### method `read_file` @@ -699,7 +700,7 @@ for row in file.read_file(iterator=True): --- - + ### method `write` @@ -746,7 +747,7 @@ with file(create=True): --- - + ### method `write_file` diff --git a/docs/toolbox.files.md b/docs/toolbox.files.md index 2f7d14f..a26f814 100644 --- a/docs/toolbox.files.md +++ b/docs/toolbox.files.md @@ -73,6 +73,7 @@ csv_data = file.read_zip_csv(data) **Global Variables** --------------- +- **CSV_AUTO** - **CSV_DIALECT** - **CSV_ENCODING** - **JSON_ENCODING** From cd216614cdde12390c118b8f52eb481ffa95de76 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 17 Mar 2024 16:48:30 +0100 Subject: [PATCH 14/35] feat: add a helper for computing combinations and possibly extract the values from a sparse list --- src/cerbernetix/toolbox/math/__init__.py | 21 ++++- src/cerbernetix/toolbox/math/combination.py | 78 +++++++++++++++++- tests/math/test_combination.py | 91 ++++++++++++++++++++- 3 files changed, 182 insertions(+), 8 deletions(-) diff --git a/src/cerbernetix/toolbox/math/__init__.py b/src/cerbernetix/toolbox/math/__init__.py index c44772d..1f65d1f 100644 --- a/src/cerbernetix/toolbox/math/__init__.py +++ b/src/cerbernetix/toolbox/math/__init__.py @@ -4,16 +4,31 @@ - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. +- `get_combinations(values, length, offset, columns)`: Yields lists of combined values according to +the combinations defined by the lengths. Examples: ```python -from cerbernetix.toolbox.math import get_combination_rank, get_combination_from_rank +from cerbernetix.toolbox.math import ( + get_combination_rank, + get_combination_from_rank, + get_combinations, +) # Get the rank of a combination of 3 numbers print(get_combination_rank([1, 3, 5])) # Get the combination of 3 numbers ranked at position 5 -print(list(get_combination_from_rank(5, 3))) +print(get_combination_from_rank(5, 3)) + +# Get the combinations of 3 numbers from the list +values = [1, 2, 4, 8, 16] +print(list(get_combinations(values, 3))) ``` """ -from cerbernetix.toolbox.math.combination import get_combination_from_rank, get_combination_rank + +from cerbernetix.toolbox.math.combination import ( + get_combination_from_rank, + get_combination_rank, + get_combinations, +) diff --git a/src/cerbernetix/toolbox/math/combination.py b/src/cerbernetix/toolbox/math/combination.py index 2dfc304..5cc0ade 100644 --- a/src/cerbernetix/toolbox/math/combination.py +++ b/src/cerbernetix/toolbox/math/combination.py @@ -2,17 +2,26 @@ Examples: ```python -from cerbernetix.toolbox.math import get_combination_rank, get_combination_from_rank +from cerbernetix.toolbox.math import ( + get_combination_rank, + get_combination_from_rank, + get_combinations, +) # Get the rank of a combination of 3 numbers print(get_combination_rank([1, 3, 5])) # Get the combination of 3 numbers ranked at position 5 -print(list(get_combination_from_rank(5, 3))) +print(get_combination_from_rank(5, 3)) + +# Get the combinations of 3 numbers from the list +values = [1, 2, 4, 8, 16] +print(list(get_combinations(values, 3))) ``` """ + from math import comb -from typing import Iterable +from typing import Iterable, Iterator def get_combination_rank(combination: Iterable[int], offset: int = 0) -> int: @@ -77,7 +86,7 @@ def get_combination_from_rank(rank: int, length: int = 2, offset: int = 0) -> li from cerbernetix.toolbox.math import get_combination_from_rank # Get the combination of 3 numbers ranked at position 5 - print(list(get_combination_from_rank(5, 3))) + print(get_combination_from_rank(5, 3)) ``` """ if rank < 0: @@ -115,3 +124,64 @@ def get_combination_from_rank(rank: int, length: int = 2, offset: int = 0) -> li combination[0] = rank - binomial + offset return combination + + +def get_combinations( + values: int | list | tuple | dict, + length: int = 2, + offset: int = 0, + columns: list | tuple = None, +) -> Iterator[list]: + """Yields lists of combined values according to the combinations defined by the lengths. + + Taking a list of values and the length of a combination, it yields each combination of length + elements taken from the values. + + Note: Beware, the number of possible combinations grows fast with the lengths. + For example, 3 out of 5 gives 10 possible combinations, but 3 out of 50 gives 19600... + + Args: + values (int | list | tuple | dict): The list of values from which build the list of + combinations. It can be either the length of a range of integers from 0, or a list of + sparse values. + length (int, optional): The length of each combination. Defaults to 2. + offset (int, optional): An offset to add to the values if they must not start at 0. + Defaults to 0. + columns (list | tuple, optional): A mapping list for retrieving the values in order from + the values. Defaults to None. + + Yields: + Iterator[list]: A list of combined values by the given length. + + Examples: + ```python + from cerbernetix.toolbox.math import get_combinations + + # Get the combinations of 3 numbers from the list + values = [1, 2, 4, 8, 16] + print(list(get_combinations(values, 3))) + # [[1, 2, 4], + # [1, 2, 8], + # [1, 4, 8], + # [2, 4, 8], + # [1, 2, 16], + # [1, 4, 16], + # [2, 4, 16], + # [1, 8, 16], + # [2, 8, 16], + # [4, 8, 16]] + ``` + """ + if isinstance(values, int): + nb_values = values + values = [*range(values)] + else: + nb_values = len(values) + nb_comb = comb(nb_values, length) + + if columns is None: + columns = [*range(nb_values)] + + for rank in range(nb_comb): + combination = get_combination_from_rank(rank, length) + yield [values[columns[index]] + offset for index in combination] diff --git a/tests/math/test_combination.py b/tests/math/test_combination.py index 2adcd2c..847e713 100644 --- a/tests/math/test_combination.py +++ b/tests/math/test_combination.py @@ -1,7 +1,13 @@ """Test the set of functions for working with combinations.""" + import unittest +from typing import Iterator -from cerbernetix.toolbox.math import get_combination_from_rank, get_combination_rank +from cerbernetix.toolbox.math import ( + get_combination_from_rank, + get_combination_rank, + get_combinations, +) from cerbernetix.toolbox.testing import test_cases @@ -205,3 +211,86 @@ def test_get_combination_from_rank_error(self): """Test get_combination_from_rank errors.""" self.assertRaises(ValueError, lambda: get_combination_from_rank(-1, 2)) self.assertRaises(ValueError, lambda: get_combination_from_rank(0, -1, 4)) + + def test_get_combinations_int(self): + """Test get_combinations.""" + self.assertIsInstance(get_combinations(5, 3), Iterator) + self.assertEqual(next(get_combinations(5, 3)), [0, 1, 2]) + self.assertEqual( + list(get_combinations(5, 3)), + [ + [0, 1, 2], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 4], + [0, 2, 4], + [1, 2, 4], + [0, 3, 4], + [1, 3, 4], + [2, 3, 4], + ], + ) + + def test_get_combinations_offset(self): + """Test get_combinations.""" + self.assertIsInstance(get_combinations(5, 3, offset=1), Iterator) + self.assertEqual(next(get_combinations(5, 3, offset=1)), [1, 2, 3]) + self.assertEqual( + list(get_combinations(5, 3, offset=1)), + [ + [1, 2, 3], + [1, 2, 4], + [1, 3, 4], + [2, 3, 4], + [1, 2, 5], + [1, 3, 5], + [2, 3, 5], + [1, 4, 5], + [2, 4, 5], + [3, 4, 5], + ], + ) + + def test_get_combinations_values(self): + """Test get_combinations.""" + values = [1, 2, 4, 8, 16] + self.assertIsInstance(get_combinations(values, 3), Iterator) + self.assertEqual(next(get_combinations(values, 3)), [1, 2, 4]) + self.assertEqual( + list(get_combinations(values, 3)), + [ + [1, 2, 4], + [1, 2, 8], + [1, 4, 8], + [2, 4, 8], + [1, 2, 16], + [1, 4, 16], + [2, 4, 16], + [1, 8, 16], + [2, 8, 16], + [4, 8, 16], + ], + ) + + def test_get_combinations_columns(self): + """Test get_combinations with a dictionary and a list of columns.""" + values = {"1": 1, "2": 2, "4": 4, "8": 8, "16": 16} + columns = ["1", "2", "4", "8", "16"] + self.assertIsInstance(get_combinations(values, 3, columns=columns), Iterator) + self.assertEqual(next(get_combinations(values, 3, columns=columns)), [1, 2, 4]) + self.assertEqual( + list(get_combinations(values, 3, columns=columns)), + [ + [1, 2, 4], + [1, 2, 8], + [1, 4, 8], + [2, 4, 8], + [1, 2, 16], + [1, 4, 16], + [2, 4, 16], + [1, 8, 16], + [2, 8, 16], + [4, 8, 16], + ], + ) From b1d78e77d698117c854f8726e363698d05837796 Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 17 Mar 2024 16:49:02 +0100 Subject: [PATCH 15/35] doc: update the documentation --- docs/README.md | 1 + docs/toolbox.math.combination.md | 76 +++++++++++++++++++++++++++++--- docs/toolbox.math.md | 13 +++++- 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/docs/README.md b/docs/README.md index f42f347..274602a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -73,6 +73,7 @@ - [`config.setup_file_logging`](./toolbox.logging.config.md#function-setup_file_logging): Setup the application log to a file logger. - [`combination.get_combination_from_rank`](./toolbox.math.combination.md#function-get_combination_from_rank): Gets the combination corresponding to a particular rank. - [`combination.get_combination_rank`](./toolbox.math.combination.md#function-get_combination_rank): Gets the rank of a combination. +- [`combination.get_combinations`](./toolbox.math.combination.md#function-get_combinations): Yields lists of combined values according to the combinations defined by the lengths. - [`decorators.test_cases`](./toolbox.testing.decorators.md#function-test_cases): Creates a decorator for parametric test cases. diff --git a/docs/toolbox.math.combination.md b/docs/toolbox.math.combination.md index 04592f7..9a7e8a0 100644 --- a/docs/toolbox.math.combination.md +++ b/docs/toolbox.math.combination.md @@ -9,19 +9,27 @@ A set of functions for working with combinations. **Examples:** ```python -from cerbernetix.toolbox.math import get_combination_rank, get_combination_from_rank +from cerbernetix.toolbox.math import ( + get_combination_rank, + get_combination_from_rank, + get_combinations, +) # Get the rank of a combination of 3 numbers print(get_combination_rank([1, 3, 5])) # Get the combination of 3 numbers ranked at position 5 -print(list(get_combination_from_rank(5, 3))) +print(get_combination_from_rank(5, 3)) + +# Get the combinations of 3 numbers from the list +values = [1, 2, 4, 8, 16] +print(list(get_combinations(values, 3))) ``` --- - + ## function `get_combination_rank` @@ -67,7 +75,7 @@ print(get_combination_rank([1, 3, 5])) --- - + ## function `get_combination_from_rank` @@ -111,7 +119,65 @@ The rank must start at 0. from cerbernetix.toolbox.math import get_combination_from_rank # Get the combination of 3 numbers ranked at position 5 -print(list(get_combination_from_rank(5, 3))) +print(get_combination_from_rank(5, 3)) +``` + + +--- + + + +## function `get_combinations` + +```python +get_combinations( + values: int | list | tuple | dict, + length: int = 2, + offset: int = 0, + columns: list | tuple = None +) → Iterator[list] +``` + +Yields lists of combined values according to the combinations defined by the lengths. + +Taking a list of values and the length of a combination, it yields each combination of length elements taken from the values. + +Note: Beware, the number of possible combinations grows fast with the lengths. For example, 3 out of 5 gives 10 possible combinations, but 3 out of 50 gives 19600... + + + +**Args:** + + - `values` (int | list | tuple | dict): The list of values from which build the list of combinations. It can be either the length of a range of integers from 0, or a list of sparse values. + - `length` (int, optional): The length of each combination. Defaults to 2. + - `offset` (int, optional): An offset to add to the values if they must not start at 0. Defaults to 0. + - `columns` (list | tuple, optional): A mapping list for retrieving the values in order from the values. Defaults to None. + + + +**Yields:** + + - `Iterator[list]`: A list of combined values by the given length. + + + +**Examples:** + ```python +from cerbernetix.toolbox.math import get_combinations + +# Get the combinations of 3 numbers from the list +values = [1, 2, 4, 8, 16] +print(list(get_combinations(values, 3))) +# [[1, 2, 4], +# [1, 2, 8], +# [1, 4, 8], +# [2, 4, 8], +# [1, 2, 16], +# [1, 4, 16], +# [2, 4, 16], +# [1, 8, 16], +# [2, 8, 16], +# [4, 8, 16]] ``` diff --git a/docs/toolbox.math.md b/docs/toolbox.math.md index 6335ba9..5a45c74 100644 --- a/docs/toolbox.math.md +++ b/docs/toolbox.math.md @@ -8,18 +8,27 @@ A collection of Math related tools. It contains: - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. +- `get_combinations(values, length, offset, columns)`: Yields lists of combined values according to the combinations defined by the lengths. **Examples:** ```python -from cerbernetix.toolbox.math import get_combination_rank, get_combination_from_rank +from cerbernetix.toolbox.math import ( + get_combination_rank, + get_combination_from_rank, + get_combinations, +) # Get the rank of a combination of 3 numbers print(get_combination_rank([1, 3, 5])) # Get the combination of 3 numbers ranked at position 5 -print(list(get_combination_from_rank(5, 3))) +print(get_combination_from_rank(5, 3)) + +# Get the combinations of 3 numbers from the list +values = [1, 2, 4, 8, 16] +print(list(get_combinations(values, 3))) ``` From 9c6824cb97256aee636064f464c9391f6d13c25e Mon Sep 17 00:00:00 2001 From: jsconan Date: Sun, 17 Mar 2024 16:51:04 +0100 Subject: [PATCH 16/35] chore: update the changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70a10c9..6a4e275 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `get_combinations(values, length, offset, columns)`: Computes combinations given a set of values and a length. + ### Changed - Add options to JSONFile implementation (`sort_keys`, `skip_keys`, `ensure_ascii`, `separators`, `strict`). From 72c546936cf469c15bd581ef08a3136775b5db2b Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:15:43 +0200 Subject: [PATCH 17/35] feat: add parameters for getting combinations from a particular range of ranks --- src/cerbernetix/toolbox/math/__init__.py | 8 ++- src/cerbernetix/toolbox/math/combination.py | 61 ++++++++++++++++--- tests/math/test_combination.py | 65 +++++++++++++++++++-- 3 files changed, 118 insertions(+), 16 deletions(-) diff --git a/src/cerbernetix/toolbox/math/__init__.py b/src/cerbernetix/toolbox/math/__init__.py index 1f65d1f..201b413 100644 --- a/src/cerbernetix/toolbox/math/__init__.py +++ b/src/cerbernetix/toolbox/math/__init__.py @@ -4,8 +4,8 @@ - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. -- `get_combinations(values, length, offset, columns)`: Yields lists of combined values according to -the combinations defined by the lengths. +- `get_combinations(values, length, offset, indexes, start, stop, step)`: Yields lists of combined +values according to the combinations defined by the lengths. Examples: ```python @@ -24,6 +24,10 @@ # Get the combinations of 3 numbers from the list values = [1, 2, 4, 8, 16] print(list(get_combinations(values, 3))) + +# Get the combinations of 3 numbers out of 50 from rank 200 to 500 +values = [1, 2, 4, 8, 16] +print(list(get_combinations(50, 3, start=200, stop=300))) ``` """ diff --git a/src/cerbernetix/toolbox/math/combination.py b/src/cerbernetix/toolbox/math/combination.py index 5cc0ade..5797a95 100644 --- a/src/cerbernetix/toolbox/math/combination.py +++ b/src/cerbernetix/toolbox/math/combination.py @@ -17,12 +17,18 @@ # Get the combinations of 3 numbers from the list values = [1, 2, 4, 8, 16] print(list(get_combinations(values, 3))) + +# Get the combinations of 3 numbers out of 50 from rank 200 to 500 +values = [1, 2, 4, 8, 16] +print(list(get_combinations(50, 3, start=200, stop=300))) ``` """ from math import comb from typing import Iterable, Iterator +from cerbernetix.toolbox.data.mappers import passthrough + def get_combination_rank(combination: Iterable[int], offset: int = 0) -> int: """Gets the rank of a combination. @@ -129,8 +135,11 @@ def get_combination_from_rank(rank: int, length: int = 2, offset: int = 0) -> li def get_combinations( values: int | list | tuple | dict, length: int = 2, + start: int = 0, + stop: int = None, + step: int = 1, offset: int = 0, - columns: list | tuple = None, + indexes: list | tuple = None, ) -> Iterator[list]: """Yields lists of combined values according to the combinations defined by the lengths. @@ -145,10 +154,16 @@ def get_combinations( combinations. It can be either the length of a range of integers from 0, or a list of sparse values. length (int, optional): The length of each combination. Defaults to 2. + start (int, optional): The rank of the first combination to generate. Defaults to 0. + stop (int, optional): The rank of the last combination before what stop the generation. If + omitted, the maximum number of combination is taken. Defaults to None. + step (int, optional): The step between ranks. If start is higher than stop, the step is set + to a negative value. Defaults to 1. offset (int, optional): An offset to add to the values if they must not start at 0. Defaults to 0. - columns (list | tuple, optional): A mapping list for retrieving the values in order from - the values. Defaults to None. + indexes (list | tuple, optional): A list of indexes for retrieving the values by position. + Useful if the values are not indexed by sequential numbers or with a contiguous set like a + dictionary or a spare array. Defaults to None. Yields: Iterator[list]: A list of combined values by the given length. @@ -170,18 +185,48 @@ def get_combinations( # [1, 8, 16], # [2, 8, 16], # [4, 8, 16]] + + # Get the combinations of 3 numbers from the list from rank 4 to 8 + values = {"1": 1, "2": 2, "4": 4, "8": 8, "16": 16} + indexes = ["1", "2", "4", "8", "16"] + print(list(get_combinations(values, 3, indexes=indexes, start=4, stop=8))) + # [[1, 2, 16], + # [1, 4, 16], + # [2, 4, 16], + # [1, 8, 16]] ``` """ if isinstance(values, int): nb_values = values - values = [*range(values)] + get_value = passthrough else: nb_values = len(values) + get_value = values.__getitem__ + + if indexes is None: + get_index = passthrough + else: + get_index = indexes.__getitem__ + + if nb_values == 0 or length == 0: + return + nb_comb = comb(nb_values, length) - if columns is None: - columns = [*range(nb_values)] + if stop is None or stop > nb_comb: + stop = nb_comb + + if start >= nb_comb: + start = nb_comb - 1 + + if start < 0 or stop < -1: + raise ValueError("A combination range cannot start or stop with a negative value") + + if start > stop: + step = -abs(step) + else: + step = abs(step) - for rank in range(nb_comb): + for rank in range(start, stop, step): combination = get_combination_from_rank(rank, length) - yield [values[columns[index]] + offset for index in combination] + yield [get_value(get_index(position)) + offset for position in combination] diff --git a/tests/math/test_combination.py b/tests/math/test_combination.py index 847e713..08a0c2b 100644 --- a/tests/math/test_combination.py +++ b/tests/math/test_combination.py @@ -1,6 +1,7 @@ """Test the set of functions for working with combinations.""" import unittest +from operator import ne from typing import Iterator from cerbernetix.toolbox.math import ( @@ -273,14 +274,14 @@ def test_get_combinations_values(self): ], ) - def test_get_combinations_columns(self): - """Test get_combinations with a dictionary and a list of columns.""" + def test_get_combinations_indexes(self): + """Test get_combinations with a dictionary and a list of indexes.""" values = {"1": 1, "2": 2, "4": 4, "8": 8, "16": 16} - columns = ["1", "2", "4", "8", "16"] - self.assertIsInstance(get_combinations(values, 3, columns=columns), Iterator) - self.assertEqual(next(get_combinations(values, 3, columns=columns)), [1, 2, 4]) + indexes = ["1", "2", "4", "8", "16"] + self.assertIsInstance(get_combinations(values, 3, indexes=indexes), Iterator) + self.assertEqual(next(get_combinations(values, 3, indexes=indexes)), [1, 2, 4]) self.assertEqual( - list(get_combinations(values, 3, columns=columns)), + list(get_combinations(values, 3, indexes=indexes)), [ [1, 2, 4], [1, 2, 8], @@ -294,3 +295,55 @@ def test_get_combinations_columns(self): [4, 8, 16], ], ) + + def test_get_combinations_range(self): + """Test get_combinations with a range.""" + values = [1, 2, 4, 8, 16] + self.assertIsInstance(get_combinations(values, 3, start=2, stop=8, step=2), Iterator) + self.assertEqual( + list(get_combinations(values, 3, start=2, stop=8, step=2)), + [ + [1, 4, 8], + [1, 2, 16], + [2, 4, 16], + ], + ) + self.assertEqual( + list(get_combinations(values, 3, start=8, stop=2, step=2)), + [ + [2, 8, 16], + [2, 4, 16], + [1, 2, 16], + ], + ) + self.assertEqual( + list(get_combinations(values, 3, start=20, stop=-1)), + [ + [4, 8, 16], + [2, 8, 16], + [1, 8, 16], + [2, 4, 16], + [1, 4, 16], + [1, 2, 16], + [2, 4, 8], + [1, 4, 8], + [1, 2, 8], + [1, 2, 4], + ], + ) + self.assertEqual( + list(get_combinations(values, 3, start=20)), + [ + [4, 8, 16], + ], + ) + + def test_get_combinations_empty(self): + """Test get_combinations returns empty.""" + self.assertEqual(list(get_combinations(0, 3)), []) + self.assertEqual(list(get_combinations(5, 0)), []) + + def test_get_combinations_error(self): + """Test get_combinations errors.""" + self.assertRaises(ValueError, lambda: next(get_combinations(5, start=-10))) + self.assertRaises(ValueError, lambda: next(get_combinations(5, stop=-10))) From be8998001aecedf843fbb1919e7f1dfaafa0f041 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:16:34 +0200 Subject: [PATCH 18/35] chore: update the changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a4e275..e8b12ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- `get_combinations(values, length, offset, columns)`: Computes combinations given a set of values and a length. +- `get_combinations(values, length, start, stop, step, offset, indexes)`: Computes combinations given a set of values and a length. ### Changed From 002ef2fb57462740b4fe4a73d1307e2c30b91f51 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:34:20 +0200 Subject: [PATCH 19/35] doc: update the documentation --- docs/toolbox.math.combination.md | 29 ++++++++++++++++++++++++----- docs/toolbox.math.md | 6 +++++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/docs/toolbox.math.combination.md b/docs/toolbox.math.combination.md index 9a7e8a0..9e15c2f 100644 --- a/docs/toolbox.math.combination.md +++ b/docs/toolbox.math.combination.md @@ -24,12 +24,16 @@ print(get_combination_from_rank(5, 3)) # Get the combinations of 3 numbers from the list values = [1, 2, 4, 8, 16] print(list(get_combinations(values, 3))) + +# Get the combinations of 3 numbers out of 50 from rank 200 to 500 +values = [1, 2, 4, 8, 16] +print(list(get_combinations(50, 3, start=200, stop=300))) ``` --- - + ## function `get_combination_rank` @@ -75,7 +79,7 @@ print(get_combination_rank([1, 3, 5])) --- - + ## function `get_combination_from_rank` @@ -125,7 +129,7 @@ print(get_combination_from_rank(5, 3)) --- - + ## function `get_combinations` @@ -133,8 +137,11 @@ print(get_combination_from_rank(5, 3)) get_combinations( values: int | list | tuple | dict, length: int = 2, + start: int = 0, + stop: int = None, + step: int = 1, offset: int = 0, - columns: list | tuple = None + indexes: list | tuple = None ) → Iterator[list] ``` @@ -150,8 +157,11 @@ Note: Beware, the number of possible combinations grows fast with the lengths. F - `values` (int | list | tuple | dict): The list of values from which build the list of combinations. It can be either the length of a range of integers from 0, or a list of sparse values. - `length` (int, optional): The length of each combination. Defaults to 2. + - `start` (int, optional): The rank of the first combination to generate. Defaults to 0. + - `stop` (int, optional): The rank of the last combination before what stop the generation. If omitted, the maximum number of combination is taken. Defaults to None. + - `step` (int, optional): The step between ranks. If start is higher than stop, the step is set to a negative value. Defaults to 1. - `offset` (int, optional): An offset to add to the values if they must not start at 0. Defaults to 0. - - `columns` (list | tuple, optional): A mapping list for retrieving the values in order from the values. Defaults to None. + - `indexes` (list | tuple, optional): A list of indexes for retrieving the values by position. Useful if the values are not indexed by sequential numbers or with a contiguous set like a dictionary or a spare array. Defaults to None. @@ -178,6 +188,15 @@ print(list(get_combinations(values, 3))) # [1, 8, 16], # [2, 8, 16], # [4, 8, 16]] + +# Get the combinations of 3 numbers from the list from rank 4 to 8 +values = {"1": 1, "2": 2, "4": 4, "8": 8, "16": 16} +indexes = ["1", "2", "4", "8", "16"] +print(list(get_combinations(values, 3, indexes=indexes, start=4, stop=8))) +# [[1, 2, 16], +# [1, 4, 16], +# [2, 4, 16], +# [1, 8, 16]] ``` diff --git a/docs/toolbox.math.md b/docs/toolbox.math.md index 5a45c74..d01aa16 100644 --- a/docs/toolbox.math.md +++ b/docs/toolbox.math.md @@ -8,7 +8,7 @@ A collection of Math related tools. It contains: - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. -- `get_combinations(values, length, offset, columns)`: Yields lists of combined values according to the combinations defined by the lengths. +- `get_combinations(values, length, offset, indexes, start, stop, step)`: Yields lists of combined values according to the combinations defined by the lengths. @@ -29,6 +29,10 @@ print(get_combination_from_rank(5, 3)) # Get the combinations of 3 numbers from the list values = [1, 2, 4, 8, 16] print(list(get_combinations(values, 3))) + +# Get the combinations of 3 numbers out of 50 from rank 200 to 500 +values = [1, 2, 4, 8, 16] +print(list(get_combinations(50, 3, start=200, stop=300))) ``` From cffef151d21802298c5f1f418616e9ffd36cbfc8 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:40:02 +0200 Subject: [PATCH 20/35] doc: fix order of parameters in the doc --- docs/toolbox.math.md | 2 +- src/cerbernetix/toolbox/math/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/toolbox.math.md b/docs/toolbox.math.md index d01aa16..14e2b9f 100644 --- a/docs/toolbox.math.md +++ b/docs/toolbox.math.md @@ -8,7 +8,7 @@ A collection of Math related tools. It contains: - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. -- `get_combinations(values, length, offset, indexes, start, stop, step)`: Yields lists of combined values according to the combinations defined by the lengths. +- `get_combinations(values, length, start, stop, step, offset, indexes)`: Yields lists of combined values according to the combinations defined by the lengths. diff --git a/src/cerbernetix/toolbox/math/__init__.py b/src/cerbernetix/toolbox/math/__init__.py index 201b413..793c3cc 100644 --- a/src/cerbernetix/toolbox/math/__init__.py +++ b/src/cerbernetix/toolbox/math/__init__.py @@ -4,7 +4,7 @@ - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. -- `get_combinations(values, length, offset, indexes, start, stop, step)`: Yields lists of combined +- `get_combinations(values, length, start, stop, step, offset, indexes)`: Yields lists of combined values according to the combinations defined by the lengths. Examples: From 9f866e099dea23b69ed331f42169879ed8925931 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:42:38 +0200 Subject: [PATCH 21/35] fix: inconsistent return value in the log action --- src/cerbernetix/toolbox/logging/log_file.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cerbernetix/toolbox/logging/log_file.py b/src/cerbernetix/toolbox/logging/log_file.py index aab28dd..720241f 100644 --- a/src/cerbernetix/toolbox/logging/log_file.py +++ b/src/cerbernetix/toolbox/logging/log_file.py @@ -23,6 +23,7 @@ logger.error('An error occurred: %s', error) ``` """ + from __future__ import annotations import atexit @@ -290,7 +291,7 @@ def log(self, level: int, message: str, *args, **kwargs) -> LogFile: ``` """ if level < self._level: - return + return self pathname = None lineno = 0 From bc3546b63b6327eef849bba4ba991f2772d04e6c Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:47:19 +0200 Subject: [PATCH 22/35] fix: too many branches and returns in the check action --- src/cerbernetix/toolbox/files/file_manager.py | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/src/cerbernetix/toolbox/files/file_manager.py b/src/cerbernetix/toolbox/files/file_manager.py index 5241064..dd4e711 100644 --- a/src/cerbernetix/toolbox/files/file_manager.py +++ b/src/cerbernetix/toolbox/files/file_manager.py @@ -25,6 +25,7 @@ print(file.read()) ``` """ + from __future__ import annotations import os @@ -319,6 +320,7 @@ def open( """ self.close() + # pylint: disable-next=consider-using-with self._file = open( self.filename, mode=get_file_mode(create, append, read, write, self.binary), @@ -568,41 +570,26 @@ def check( if must_exist and not exist: return False - if min_time is not None or max_time is not None: - if not exist: - return False - - file_time = self.date - - if min_time is not None and file_time <= min_time: - return False - - if max_time is not None and file_time >= max_time: - return False - - if min_age is not None or max_age is not None: - if not exist: - return False - - file_age = self.age + def fail(value: int, value_min: int, value_max: int) -> bool: + if value_min is not None or value_max is not None: + if not exist: + return True - if min_age is not None and file_age <= min_age: - return False + if value_min is not None and value <= value_min: + return True - if max_age is not None and file_age >= max_age: - return False - - if min_size is not None or max_size is not None: - if not exist: - return False + if value_max is not None and value >= value_max: + return True + return False - file_size = self.size + if fail(self.date, min_time, max_time): + return False - if min_size is not None and file_size <= min_size: - return False + if fail(self.age, min_age, max_age): + return False - if max_size is not None and file_size >= max_size: - return False + if fail(self.size, min_size, max_size): + return False return True From dc1704332f9d361a0e6c8f4005d1f913d9454d64 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:48:08 +0200 Subject: [PATCH 23/35] fix: disable invalid name for test case helpers --- src/cerbernetix/toolbox/testing/test_case.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cerbernetix/toolbox/testing/test_case.py b/src/cerbernetix/toolbox/testing/test_case.py index 0fd7789..3f5c8d9 100644 --- a/src/cerbernetix/toolbox/testing/test_case.py +++ b/src/cerbernetix/toolbox/testing/test_case.py @@ -12,6 +12,7 @@ def test_dict(self): self.assertListsAlmostEqual(create_dict(), {"value": 42.4242, "PI": 3.1415}) ``` """ + import unittest from typing import Iterable @@ -19,9 +20,10 @@ def test_dict(self): class TestCase(unittest.TestCase): """Test class with additional assertions.""" + # pylint: disable-next=invalid-name def assertListsAlmostEqual( self, first: Iterable[float], second: Iterable[float], places: int = 7 - ): + ) -> None: """Asserts that 2 lists of float numbers are almost equal by the number of places. Args: @@ -46,7 +48,8 @@ def test_almost_equal(self): second_is_iterable = isinstance(second, Iterable) if not first_is_iterable and not second_is_iterable: - return self.assertAlmostEqual(first, second, places) + self.assertAlmostEqual(first, second, places) + return if not first_is_iterable or not second_is_iterable: raise AssertionError("first != second") @@ -76,9 +79,10 @@ def test_almost_equal(self): self.assertListsAlmostEqual(left, right, places) + # pylint: disable-next=invalid-name def assertListsNotAlmostEqual( self, first: Iterable[float], second: Iterable[float], places: int = 7 - ): + ) -> None: """Asserts that 2 lists of float numbers are not almost equal by the number of places. Args: From 9e9f8ccf8ee2db70b4572248f72be8767071c3b1 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:52:18 +0200 Subject: [PATCH 24/35] doc: fix too long lines --- src/cerbernetix/toolbox/files/file.py | 8 ++++++-- src/cerbernetix/toolbox/files/pickle_file.py | 17 +++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/cerbernetix/toolbox/files/file.py b/src/cerbernetix/toolbox/files/file.py index 914d83a..5d064ae 100644 --- a/src/cerbernetix/toolbox/files/file.py +++ b/src/cerbernetix/toolbox/files/file.py @@ -2,7 +2,9 @@ Examples: ```python -from cerbernetix.toolbox.files import fetch_content, get_file_mode, read_file, read_zip_file, write_file +from cerbernetix.toolbox.files import ( + fetch_content, get_file_mode, read_file, read_zip_file, write_file +) # get_file_mode() is used to build a file access mode. # For example to create a text file: @@ -33,6 +35,7 @@ content = read_zip_file(data) ``` """ + import os import zipfile from io import BytesIO @@ -222,7 +225,8 @@ def fetch_content( binary (bool): Tells if the content is binary (True) or text (False). When True, the function will return a bytes sequence, otherwise it will return a string sequence. timeout (int | tuple): The request timeout. Defaults to (6, 30). - **kwargs: Additional parameters for the GET request. For more info, see [requests/api](https://requests.readthedocs.io/en/latest/api/). + **kwargs: Additional parameters for the GET request. For more info, see + [requests/api](https://requests.readthedocs.io/en/latest/api/). Raises: requests.RequestException: There was an ambiguous exception that occurred while handling diff --git a/src/cerbernetix/toolbox/files/pickle_file.py b/src/cerbernetix/toolbox/files/pickle_file.py index 50ba8e2..ba5b63c 100644 --- a/src/cerbernetix/toolbox/files/pickle_file.py +++ b/src/cerbernetix/toolbox/files/pickle_file.py @@ -39,6 +39,7 @@ first = file.read() ``` """ + from __future__ import annotations import pickle @@ -134,17 +135,17 @@ def __init__( pickle will try to map the new Python 3 names to the old module names used in Python 2, so that the pickle data stream is readable with Python 2. Defaults to True. encoding (str, optional): Tell pickle how to decode 8-bit string instances pickled by - Python 2. The encoding can be ‘bytes’ to read these 8-bit string instances as bytes objects. - Using encoding='latin1' is required for unpickling NumPy arrays and instances of datetime, - date and time pickled by Python 2. Defaults to ‘ASCII’. + Python 2. The encoding can be ‘bytes’ to read these 8-bit string instances as bytes + objects. Using encoding='latin1' is required for unpickling NumPy arrays and instances + of datetime, date and time pickled by Python 2. Defaults to ‘ASCII’. errors (str, optional): Tell pickle how to decode 8-bit string instances pickled by Python 2. Defaults to ‘strict’. buffers (optional): If buffers is None (the default), then all data necessary for - deserialization must be contained in the pickle stream. This means that the buffer_callback - argument was None when a Pickler was instantiated (or when dump() or dumps() was called). If - buffers is not None, it should be an iterable of buffer-enabled objects that is consumed - each time the pickle stream references an out-of-band buffer view. Such buffers have been - given in order to the buffer_callback of a Pickler object. + deserialization must be contained in the pickle stream. This means that the + buffer_callback argument was None when a Pickler was instantiated (or when dump() or + dumps() was called). If buffers is not None, it should be an iterable of buffer-enabled + objects that is consumed each time the pickle stream references an out-of-band buffer + view. Such buffers have been given in order to the buffer_callback of a Pickler object. buffer_callback (optional): If buffer_callback is None (the default), buffer views are serialized into file as part of the pickle stream. If buffer_callback is not None, then it can be called any number of times with a buffer view. If the callback returns a false From 3a835b009d3b3495f9ebd1a5bb960377b9c775f3 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:56:33 +0200 Subject: [PATCH 25/35] doc: update the documentation --- docs/toolbox.files.file.md | 17 ++++++++++------- docs/toolbox.files.file_manager.md | 24 ++++++++++++------------ docs/toolbox.files.pickle_file.md | 16 ++++++++-------- docs/toolbox.logging.log_file.md | 24 ++++++++++++------------ docs/toolbox.testing.test_case.md | 10 +++++----- 5 files changed, 47 insertions(+), 44 deletions(-) diff --git a/docs/toolbox.files.file.md b/docs/toolbox.files.file.md index 81c7cc5..c73856c 100644 --- a/docs/toolbox.files.file.md +++ b/docs/toolbox.files.file.md @@ -9,7 +9,9 @@ A collection of utilities for accessing files. **Examples:** ```python -from cerbernetix.toolbox.files import fetch_content, get_file_mode, read_file, read_zip_file, write_file +from cerbernetix.toolbox.files import ( + fetch_content, get_file_mode, read_file, read_zip_file, write_file +) # get_file_mode() is used to build a file access mode. # For example to create a text file: @@ -43,7 +45,7 @@ content = read_zip_file(data) --- - + ## function `get_file_mode` @@ -107,7 +109,7 @@ with open('path/to/file', get_file_mode(binary=True)) as file: --- - + ## function `read_file` @@ -159,7 +161,7 @@ data = read_file('path/to/file', binary=True) --- - + ## function `write_file` @@ -214,7 +216,7 @@ write_file('path/to/file', data, binary=True) --- - + ## function `fetch_content` @@ -240,7 +242,8 @@ Under the hood, it relies on requests to process the query. - `url` (str): The URL of the content to fetch. - `binary` (bool): Tells if the content is binary (True) or text (False). When True, the function will return a bytes sequence, otherwise it will return a string sequence. - `timeout` (int | tuple): The request timeout. Defaults to (6, 30). - - `**kwargs`: Additional parameters for the GET request. For more info, see [requests/api](https://requests.readthedocs.io/en/latest/api/). + - `**kwargs`: Additional parameters for the GET request. For more info, see + - `[requests/api](https`: //requests.readthedocs.io/en/latest/api/). @@ -277,7 +280,7 @@ data = fetch_content("http://example.com/data", binary=True) --- - + ## function `read_zip_file` diff --git a/docs/toolbox.files.file_manager.md b/docs/toolbox.files.file_manager.md index 15bbd3f..3af0f86 100644 --- a/docs/toolbox.files.file_manager.md +++ b/docs/toolbox.files.file_manager.md @@ -36,7 +36,7 @@ with file: --- - + ## class `FileManager` Offers a simple API for reading and writing files. @@ -74,7 +74,7 @@ with file(create=True): content = file.read_file() ``` - + ### method `__init__` @@ -293,7 +293,7 @@ size = file.size --- - + ### method `check` @@ -355,7 +355,7 @@ else: --- - + ### method `close` @@ -393,7 +393,7 @@ file.close() --- - + ### method `create_path` @@ -428,7 +428,7 @@ else: --- - + ### method `delete` @@ -467,7 +467,7 @@ file.delete() --- - + ### method `exists` @@ -499,7 +499,7 @@ if file.exists(): --- - + ### method `open` @@ -560,7 +560,7 @@ data = [dat for dat in file] --- - + ### method `read` @@ -601,7 +601,7 @@ with file: --- - + ### method `read_file` @@ -650,7 +650,7 @@ for data in file.read_file(iterator=True): --- - + ### method `write` @@ -696,7 +696,7 @@ with file(create=True): --- - + ### method `write_file` diff --git a/docs/toolbox.files.pickle_file.md b/docs/toolbox.files.pickle_file.md index 1cb4dc9..e8888eb 100644 --- a/docs/toolbox.files.pickle_file.md +++ b/docs/toolbox.files.pickle_file.md @@ -54,7 +54,7 @@ with file: --- - + ## function `read_pickle_file` @@ -106,7 +106,7 @@ for obj in read_pickle_file('path/to/file', iterator=True): --- - + ## function `write_pickle_file` @@ -156,7 +156,7 @@ write_pickle_file('path/to/file', data) --- - + ## class `PickleFile` Offers a simple API for reading and writing pickle files. @@ -200,7 +200,7 @@ file.write_file(data) data = file.read_file() ``` - + ### method `__init__` @@ -439,7 +439,7 @@ size = file.size --- - + ### method `read` @@ -483,7 +483,7 @@ data = [obj for obj in file] --- - + ### method `read_file` @@ -534,7 +534,7 @@ for obj in file.read_file(iterator=True): --- - + ### method `write` @@ -581,7 +581,7 @@ with file(create=True): --- - + ### method `write_file` diff --git a/docs/toolbox.logging.log_file.md b/docs/toolbox.logging.log_file.md index 48e1bc5..e0b51a1 100644 --- a/docs/toolbox.logging.log_file.md +++ b/docs/toolbox.logging.log_file.md @@ -40,7 +40,7 @@ logger.error('An error occurred: %s', error) --- - + ## class `LogFile` Offers a similar API to the Python builtin loggers for logging to a custom file. @@ -68,7 +68,7 @@ logger = LogFile() logger.info('The received value is %d', value) ``` - + ### method `__init__` @@ -233,7 +233,7 @@ print(logger.name) --- - + ### method `close` @@ -266,7 +266,7 @@ logger.close() # not necessary since it will be called automatically upon exit. --- - + ### method `debug` @@ -306,7 +306,7 @@ logger.debug('For debug purpose: %d given to %s', value, action) --- - + ### method `delete` @@ -335,7 +335,7 @@ logger.delete() --- - + ### method `error` @@ -375,7 +375,7 @@ logger.error('An error occurred: %s', error) --- - + ### method `info` @@ -415,7 +415,7 @@ logger.info('The received value is %d', value) --- - + ### method `log` @@ -459,7 +459,7 @@ logger.log(logging.ERROR, 'An error occurred: %s', error) --- - + ### method `open` @@ -492,7 +492,7 @@ logger.info('Something was done at %s', datetime.now()) --- - + ### method `set_format` @@ -527,7 +527,7 @@ logger.set_format('[%(asctime)s][%(levelname)s]: %(message)s') --- - + ### method `set_level` @@ -562,7 +562,7 @@ logger.set_level(logging.DEBUG) --- - + ### method `warn` diff --git a/docs/toolbox.testing.test_case.md b/docs/toolbox.testing.test_case.md index 0892e5d..b903f88 100644 --- a/docs/toolbox.testing.test_case.md +++ b/docs/toolbox.testing.test_case.md @@ -23,7 +23,7 @@ class TestMyStuff(testing.TestCase) --- - + ## class `TestCase` Test class with additional assertions. @@ -33,7 +33,7 @@ Test class with additional assertions. --- - + ### method `assertListsAlmostEqual` @@ -42,7 +42,7 @@ assertListsAlmostEqual( first: Iterable[float], second: Iterable[float], places: int = 7 -) +) → None ``` Asserts that 2 lists of float numbers are almost equal by the number of places. @@ -74,7 +74,7 @@ class TestMyStuff(testing.TestCase) --- - + ### method `assertListsNotAlmostEqual` @@ -83,7 +83,7 @@ assertListsNotAlmostEqual( first: Iterable[float], second: Iterable[float], places: int = 7 -) +) → None ``` Asserts that 2 lists of float numbers are not almost equal by the number of places. From c7e0c5addaf844a2537c430ff7bf92c7961fb9fa Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 3 Apr 2024 21:58:08 +0200 Subject: [PATCH 26/35] chore: update the changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8b12ca..090096e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix the link to the documentation in the readme. +- Inconsistent return value in the log action. +- Too many branches and returns in the file checker. +- A few linter issues. ## [0.9.1] - 2023-10-27 From fa851b29f1ea9b9f24433d915afe8454db9cf503 Mon Sep 17 00:00:00 2001 From: jsconan Date: Mon, 8 Apr 2024 22:52:18 +0200 Subject: [PATCH 27/35] test: remove unused package import --- tests/math/test_combination.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/math/test_combination.py b/tests/math/test_combination.py index 08a0c2b..41316b8 100644 --- a/tests/math/test_combination.py +++ b/tests/math/test_combination.py @@ -1,7 +1,6 @@ """Test the set of functions for working with combinations.""" import unittest -from operator import ne from typing import Iterator from cerbernetix.toolbox.math import ( From 312cc8fad4f275d489f7b360ad0a113346d04815 Mon Sep 17 00:00:00 2001 From: jsconan Date: Mon, 8 Apr 2024 22:52:47 +0200 Subject: [PATCH 28/35] doc: add an example for get_combinations --- src/cerbernetix/toolbox/math/combination.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cerbernetix/toolbox/math/combination.py b/src/cerbernetix/toolbox/math/combination.py index 5797a95..3baaa2e 100644 --- a/src/cerbernetix/toolbox/math/combination.py +++ b/src/cerbernetix/toolbox/math/combination.py @@ -194,6 +194,19 @@ def get_combinations( # [1, 4, 16], # [2, 4, 16], # [1, 8, 16]] + + # Get combinations from a number of values + print(list(get_combinations(5, 3, offset=1))) + # [[1, 2, 3], + # [1, 2, 4], + # [1, 3, 4], + # [2, 3, 4], + # [1, 2, 5], + # [1, 3, 5], + # [2, 3, 5], + # [1, 4, 5], + # [2, 4, 5], + # [3, 4, 5]] ``` """ if isinstance(values, int): From 115b5af03944a6364b4d9d0eec380650a1087984 Mon Sep 17 00:00:00 2001 From: jsconan Date: Mon, 8 Apr 2024 22:57:42 +0200 Subject: [PATCH 29/35] feat: add a math helper to get both the min and the max values --- src/cerbernetix/toolbox/math/__init__.py | 2 ++ src/cerbernetix/toolbox/math/utils.py | 28 ++++++++++++++++++++++++ tests/math/test_utils.py | 20 +++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 src/cerbernetix/toolbox/math/utils.py create mode 100644 tests/math/test_utils.py diff --git a/src/cerbernetix/toolbox/math/__init__.py b/src/cerbernetix/toolbox/math/__init__.py index 793c3cc..f7302cf 100644 --- a/src/cerbernetix/toolbox/math/__init__.py +++ b/src/cerbernetix/toolbox/math/__init__.py @@ -6,6 +6,7 @@ particular rank. - `get_combinations(values, length, start, stop, step, offset, indexes)`: Yields lists of combined values according to the combinations defined by the lengths. +- `minmax(*args)`: Returns with the min and the max value from the given arguments. Examples: ```python @@ -36,3 +37,4 @@ get_combination_rank, get_combinations, ) +from cerbernetix.toolbox.math.utils import minmax diff --git a/src/cerbernetix/toolbox/math/utils.py b/src/cerbernetix/toolbox/math/utils.py new file mode 100644 index 0000000..cf52000 --- /dev/null +++ b/src/cerbernetix/toolbox/math/utils.py @@ -0,0 +1,28 @@ +"""A set of helper functions related to math. + +Examples +```python +from cerbernetix.toolbox.math import minmax + +mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 +``` +""" + + +def minmax(*args) -> tuple: + """Returns with the min and the max value from the arguments. + + Args: + *args: Arguments from which extract the min and the max. + + Returns: + tuple: A tuple with first the min value, then the max value. + + Examples + ```python + from cerbernetix.toolbox.math import minmax + + mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 + ``` + """ + return min(*args), max(*args) diff --git a/tests/math/test_utils.py b/tests/math/test_utils.py new file mode 100644 index 0000000..0f44970 --- /dev/null +++ b/tests/math/test_utils.py @@ -0,0 +1,20 @@ +"""Test the set of helper functions related to math.""" + +import unittest + +from cerbernetix.toolbox.math import minmax + + +class TestUtils(unittest.TestCase): + """Test suite for the set of helper functions related to math.""" + + def test_minmax(self): + """Test minmax.""" + self.assertEqual(minmax([1]), (1, 1)) + self.assertEqual(minmax([1, 2]), (1, 2)) + self.assertEqual(minmax([2, 1]), (1, 2)) + self.assertEqual(minmax(*[1, 2]), (1, 2)) + self.assertEqual(minmax(*[2, 1]), (1, 2)) + self.assertEqual(minmax(1, 2), (1, 2)) + self.assertEqual(minmax(2, 1), (1, 2)) + self.assertEqual(minmax(3, 2, 6, 5, 4), (2, 6)) From 93d5ec36e3e8c85331cc4b19e3b755f045adbe04 Mon Sep 17 00:00:00 2001 From: jsconan Date: Mon, 8 Apr 2024 23:12:00 +0200 Subject: [PATCH 30/35] feat: add a math helper to get a quantity with respect to a quota applied to a total --- src/cerbernetix/toolbox/math/__init__.py | 19 ++++++++++- src/cerbernetix/toolbox/math/utils.py | 40 ++++++++++++++++++++++++ tests/math/test_utils.py | 10 +++++- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/cerbernetix/toolbox/math/__init__.py b/src/cerbernetix/toolbox/math/__init__.py index f7302cf..47e0285 100644 --- a/src/cerbernetix/toolbox/math/__init__.py +++ b/src/cerbernetix/toolbox/math/__init__.py @@ -7,6 +7,7 @@ - `get_combinations(values, length, start, stop, step, offset, indexes)`: Yields lists of combined values according to the combinations defined by the lengths. - `minmax(*args)`: Returns with the min and the max value from the given arguments. +- `quantity(quota, total)`: Gets a quantity with respect to a quota applied to a total. Examples: ```python @@ -30,6 +31,22 @@ values = [1, 2, 4, 8, 16] print(list(get_combinations(50, 3, start=200, stop=300))) ``` + +```python +from cerbernetix.toolbox.math import minmax + +mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 +``` + +```python +from cerbernetix.toolbox.math import quantity + +# Gets a size from a percentage +size = quantity(.2, 10) # 2 + +# Gets a size from an absolute value +size = quantity(6, 10) # 6 +``` """ from cerbernetix.toolbox.math.combination import ( @@ -37,4 +54,4 @@ get_combination_rank, get_combinations, ) -from cerbernetix.toolbox.math.utils import minmax +from cerbernetix.toolbox.math.utils import minmax, quantity diff --git a/src/cerbernetix/toolbox/math/utils.py b/src/cerbernetix/toolbox/math/utils.py index cf52000..cefd185 100644 --- a/src/cerbernetix/toolbox/math/utils.py +++ b/src/cerbernetix/toolbox/math/utils.py @@ -6,6 +6,16 @@ mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 ``` + +```python +from cerbernetix.toolbox.math import quantity + +# Gets a size from a percentage +size = quantity(.2, 10) # 2 + +# Gets a size from an absolute value +size = quantity(6, 10) # 6 +``` """ @@ -26,3 +36,33 @@ def minmax(*args) -> tuple: ``` """ return min(*args), max(*args) + + +def quantity(quota: int | float, total: int) -> int: + """Gets a quantity with respect to a quota applied to a total. + + Args: + quota (int | float): The expected quota from the total. It can be either a percentage or an + absolute value. The percentage is represented by a number between 0 and 1. An absolute + value is represented by a number between 1 and the total included. + total (int): The total number. + + Returns: + int: The quantity computed from the quota applied to the total. It cannot exceeds the total, + and it cannot be negative. + + Examples + ```python + from cerbernetix.toolbox.math import quantity + + # Gets a size from a percentage + size = quantity(.2, 10) # 2 + + # Gets a size from an absolute value + size = quantity(6, 10) # 6 + ``` + """ + if 0 < quota < 1: + return int(total * quota) + + return min(abs(int(quota)), total) diff --git a/tests/math/test_utils.py b/tests/math/test_utils.py index 0f44970..3b73efb 100644 --- a/tests/math/test_utils.py +++ b/tests/math/test_utils.py @@ -2,7 +2,7 @@ import unittest -from cerbernetix.toolbox.math import minmax +from cerbernetix.toolbox.math import minmax, quantity class TestUtils(unittest.TestCase): @@ -18,3 +18,11 @@ def test_minmax(self): self.assertEqual(minmax(1, 2), (1, 2)) self.assertEqual(minmax(2, 1), (1, 2)) self.assertEqual(minmax(3, 2, 6, 5, 4), (2, 6)) + + def test_quantity(self): + """Test quantity.""" + self.assertEqual(quantity(5, 10), 5) + self.assertEqual(quantity(0.1, 10), 1) + self.assertEqual(quantity(1.5, 10), 1) + self.assertEqual(quantity(-0.2, 10), 0) + self.assertEqual(quantity(30, 10), 10) From d087de3ec9a867045a1f4a2a74878be376be837e Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 1 May 2024 11:36:06 +0200 Subject: [PATCH 31/35] chore: update the changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 090096e..0046117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `get_combinations(values, length, start, stop, step, offset, indexes)`: Computes combinations given a set of values and a length. +- `minmax(*args)`: Returns with the min and the max value from the given arguments. +- `quantity(quota, total)`: Gets a quantity with respect to a quota applied to a total. ### Changed From 5b60c0325e6958274a88dd023d9496b92efa8cd2 Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 1 May 2024 11:36:39 +0200 Subject: [PATCH 32/35] doc: update the documentation --- docs/README.md | 3 + docs/toolbox.math.combination.md | 13 +++++ docs/toolbox.math.md | 18 ++++++ docs/toolbox.math.utils.md | 96 ++++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+) create mode 100644 docs/toolbox.math.utils.md diff --git a/docs/README.md b/docs/README.md index 274602a..a78b1a9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -23,6 +23,7 @@ - [`toolbox.logging.log_file`](./toolbox.logging.log_file.md#module-toolboxlogginglog_file): A custom logger that writes directly to a file. - [`toolbox.math`](./toolbox.math.md#module-toolboxmath): A collection of Math related tools. - [`toolbox.math.combination`](./toolbox.math.combination.md#module-toolboxmathcombination): A set of functions for working with combinations. +- [`toolbox.math.utils`](./toolbox.math.utils.md#module-toolboxmathutils): A set of helper functions related to math. - [`toolbox.testing`](./toolbox.testing.md#module-toolboxtesting): The `testing` package provides utilities for testing purpose. - [`toolbox.testing.decorators`](./toolbox.testing.decorators.md#module-toolboxtestingdecorators): A collection of decorators for testing purpose. - [`toolbox.testing.test_case`](./toolbox.testing.test_case.md#module-toolboxtestingtest_case): Extends the default Python TestCase with more assertions. @@ -74,6 +75,8 @@ - [`combination.get_combination_from_rank`](./toolbox.math.combination.md#function-get_combination_from_rank): Gets the combination corresponding to a particular rank. - [`combination.get_combination_rank`](./toolbox.math.combination.md#function-get_combination_rank): Gets the rank of a combination. - [`combination.get_combinations`](./toolbox.math.combination.md#function-get_combinations): Yields lists of combined values according to the combinations defined by the lengths. +- [`utils.minmax`](./toolbox.math.utils.md#function-minmax): Returns with the min and the max value from the arguments. +- [`utils.quantity`](./toolbox.math.utils.md#function-quantity): Gets a quantity with respect to a quota applied to a total. - [`decorators.test_cases`](./toolbox.testing.decorators.md#function-test_cases): Creates a decorator for parametric test cases. diff --git a/docs/toolbox.math.combination.md b/docs/toolbox.math.combination.md index 9e15c2f..cf9e851 100644 --- a/docs/toolbox.math.combination.md +++ b/docs/toolbox.math.combination.md @@ -197,6 +197,19 @@ print(list(get_combinations(values, 3, indexes=indexes, start=4, stop=8))) # [1, 4, 16], # [2, 4, 16], # [1, 8, 16]] + +# Get combinations from a number of values +print(list(get_combinations(5, 3, offset=1))) +# [[1, 2, 3], +# [1, 2, 4], +# [1, 3, 4], +# [2, 3, 4], +# [1, 2, 5], +# [1, 3, 5], +# [2, 3, 5], +# [1, 4, 5], +# [2, 4, 5], +# [3, 4, 5]] ``` diff --git a/docs/toolbox.math.md b/docs/toolbox.math.md index 14e2b9f..0f6dae9 100644 --- a/docs/toolbox.math.md +++ b/docs/toolbox.math.md @@ -9,6 +9,8 @@ It contains: - `get_combination_rank(combination, offset)`: Gets the rank of a combination. - `get_combination_from_rank(rank, length, offset)`: Gets the combination corresponding to a particular rank. - `get_combinations(values, length, start, stop, step, offset, indexes)`: Yields lists of combined values according to the combinations defined by the lengths. +- `minmax(*args)`: Returns with the min and the max value from the given arguments. +- `quantity(quota, total)`: Gets a quantity with respect to a quota applied to a total. @@ -35,6 +37,22 @@ values = [1, 2, 4, 8, 16] print(list(get_combinations(50, 3, start=200, stop=300))) ``` +```python +from cerbernetix.toolbox.math import minmax + +mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 +``` + +```python +from cerbernetix.toolbox.math import quantity + +# Gets a size from a percentage +size = quantity(.2, 10) # 2 + +# Gets a size from an absolute value +size = quantity(6, 10) # 6 +``` + diff --git a/docs/toolbox.math.utils.md b/docs/toolbox.math.utils.md new file mode 100644 index 0000000..adf0bf9 --- /dev/null +++ b/docs/toolbox.math.utils.md @@ -0,0 +1,96 @@ + + + + +# module `toolbox.math.utils` +A set of helper functions related to math. + +Examples ```python +from cerbernetix.toolbox.math import minmax + +mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 +``` + +```python +from cerbernetix.toolbox.math import quantity + +# Gets a size from a percentage +size = quantity(.2, 10) # 2 + +# Gets a size from an absolute value +size = quantity(6, 10) # 6 +``` + + +--- + + + +## function `minmax` + +```python +minmax(*args) → tuple +``` + +Returns with the min and the max value from the arguments. + + + +**Args:** + + - `*args`: Arguments from which extract the min and the max. + + + +**Returns:** + + - `tuple`: A tuple with first the min value, then the max value. + +Examples ```python +from cerbernetix.toolbox.math import minmax + +mini, maxi = minmax(3, 2, 6, 4, 5) # 2, 6 +``` + + +--- + + + +## function `quantity` + +```python +quantity(quota: int | float, total: int) → int +``` + +Gets a quantity with respect to a quota applied to a total. + + + +**Args:** + + - `quota` (int | float): The expected quota from the total. It can be either a percentage or an absolute value. The percentage is represented by a number between 0 and 1. An absolute value is represented by a number between 1 and the total included. + - `total` (int): The total number. + + + +**Returns:** + + - `int`: The quantity computed from the quota applied to the total. It cannot exceeds the total, and it cannot be negative. + +Examples ```python +from cerbernetix.toolbox.math import quantity + +# Gets a size from a percentage +size = quantity(.2, 10) # 2 + +# Gets a size from an absolute value +size = quantity(6, 10) # 6 +``` + + + + +--- + +_This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ From 91bad2b295e02316561361a2f606589baae94b5e Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 1 May 2024 11:54:57 +0200 Subject: [PATCH 33/35] doc: typo in the instructions for the installation in dev mode --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 67bf416..328d2ad 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Then, create the virtual env and install the dependencies: ```sh cd py-toolbox python3 -m venv ".venv" -source "./venv/bin/activate" +source ".venv/bin/activate" pip install -r requirements.txt pip install -e . ``` From 7c99fa4d76d934e68649c4c8e259fb926e72824e Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 1 May 2024 11:56:02 +0200 Subject: [PATCH 34/35] chore: update the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0046117..fc4ad63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Inconsistent return value in the log action. - Too many branches and returns in the file checker. - A few linter issues. +- Typo in the instructions for the installation in dev mode. ## [0.9.1] - 2023-10-27 From 7b54fe0aecf962a977b691a90c5de39f485202fb Mon Sep 17 00:00:00 2001 From: jsconan Date: Wed, 1 May 2024 12:00:39 +0200 Subject: [PATCH 35/35] chore: bump version --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc4ad63..2b70590 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.10.0] - 2024-05-01 + ### Added - `get_combinations(values, length, start, stop, step, offset, indexes)`: Computes combinations given a set of values and a length. diff --git a/pyproject.toml b/pyproject.toml index 7d157fc..4d988bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "cerbernetix.toolbox" -version = "0.9.1" +version = "0.10.0" authors = [{ name = "Jean-Sébastien CONAN", email = "jsconan@gmail.com" }] description = "A set of utilities for Python projects" readme = "README.md"