From 5f176b1f4b8833eb5c5cd2bde60d5100a6bdde49 Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Wed, 12 Apr 2023 20:36:19 -0400 Subject: [PATCH 1/3] [keras/datasets/imdb.py,keras/datasets/reuters.py] Standardise docstring usage of "Default to" --- keras/datasets/imdb.py | 12 ++++++------ keras/datasets/reuters.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/keras/datasets/imdb.py b/keras/datasets/imdb.py index ad0f1dca70e..1e61771ad79 100644 --- a/keras/datasets/imdb.py +++ b/keras/datasets/imdb.py @@ -58,17 +58,17 @@ def load_data( ranked by how often they occur (in the training set) and only the `num_words` most frequent words are kept. Any less frequent word will appear as `oov_char` value in the sequence data. If None, - all words are kept. Defaults to None, so all words are kept. + all words are kept. Defaults to `None`. skip_top: skip the top N most frequently occurring words (which may not be informative). These words will appear as - `oov_char` value in the dataset. Defaults to 0, so no words are - skipped. + `oov_char` value in the dataset. When 0, no words are + skipped. Defaults to `0`. maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. Defaults to None, which - means no truncation. + Any longer sequence will be truncated. None, means no truncation. + Defaults to `None`. seed: int. Seed for reproducible data shuffling. start_char: int. The start of a sequence will be marked with this - character. Defaults to 1 because 0 is usually the padding character. + character. 0 is usually the padding character. Defaults to `1`. oov_char: int. The out-of-vocabulary character. Words that were cut out because of the `num_words` or `skip_top` limits will be replaced with this character. diff --git a/keras/datasets/reuters.py b/keras/datasets/reuters.py index fbc431c068c..19b27949d84 100644 --- a/keras/datasets/reuters.py +++ b/keras/datasets/reuters.py @@ -65,20 +65,20 @@ def load_data( ranked by how often they occur (in the training set) and only the `num_words` most frequent words are kept. Any less frequent word will appear as `oov_char` value in the sequence data. If None, - all words are kept. Defaults to None, so all words are kept. + all words are kept. Defaults to `None`. skip_top: skip the top N most frequently occurring words (which may not be informative). These words will appear as - `oov_char` value in the dataset. Defaults to 0, so no words are - skipped. + `oov_char` value in the dataset. 0 means no words are + skipped. Defaults to 0 maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. Defaults to None, which - means no truncation. + Any longer sequence will be truncated. None means no truncation. + Defaults to `None`. test_split: Float between 0 and 1. Fraction of the dataset to be used - as test data. Defaults to 0.2, meaning 20% of the dataset is used as - test data. + as test data. 0.2 means that 20% of the dataset is used as + test data. Defaults to 0.2 seed: int. Seed for reproducible data shuffling. start_char: int. The start of a sequence will be marked with this - character. Defaults to 1 because 0 is usually the padding character. + character. 0 is usually the padding character. Defaults to `1`. oov_char: int. The out-of-vocabulary character. Words that were cut out because of the `num_words` or `skip_top` limits will be replaced with this character. From 6d271fd7914cfc92cdcc267aa6e1ee42fce7c3f2 Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Wed, 19 Apr 2023 22:41:02 -0400 Subject: [PATCH 2/3] [keras/datasets/reuters.py] Use backticks for defaults in docstrings --- keras/datasets/imdb.py | 2 +- keras/datasets/reuters.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/keras/datasets/imdb.py b/keras/datasets/imdb.py index 1e61771ad79..30dc4f80946 100644 --- a/keras/datasets/imdb.py +++ b/keras/datasets/imdb.py @@ -64,7 +64,7 @@ def load_data( `oov_char` value in the dataset. When 0, no words are skipped. Defaults to `0`. maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. None, means no truncation. + Any longer sequence will be truncated. `None`, means no truncation. Defaults to `None`. seed: int. Seed for reproducible data shuffling. start_char: int. The start of a sequence will be marked with this diff --git a/keras/datasets/reuters.py b/keras/datasets/reuters.py index 19b27949d84..6bc8f8cd34f 100644 --- a/keras/datasets/reuters.py +++ b/keras/datasets/reuters.py @@ -68,14 +68,14 @@ def load_data( all words are kept. Defaults to `None`. skip_top: skip the top N most frequently occurring words (which may not be informative). These words will appear as - `oov_char` value in the dataset. 0 means no words are - skipped. Defaults to 0 + `oov_char` value in the dataset. `0` means no words are + skipped. Defaults to `0`. maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. None means no truncation. + Any longer sequence will be truncated. `None` means no truncation. Defaults to `None`. test_split: Float between 0 and 1. Fraction of the dataset to be used as test data. 0.2 means that 20% of the dataset is used as - test data. Defaults to 0.2 + test data. Defaults to `0.2`. seed: int. Seed for reproducible data shuffling. start_char: int. The start of a sequence will be marked with this character. 0 is usually the padding character. Defaults to `1`. From 7ff7cccb08f3e35fe3c8b730a288e0522c005ef6 Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Mon, 24 Apr 2023 23:31:46 -0400 Subject: [PATCH 3/3] [keras/datasets/reuters.py] Resolve E501 --- keras/datasets/reuters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/keras/datasets/reuters.py b/keras/datasets/reuters.py index f64d1dcebf6..38cc15e33d9 100644 --- a/keras/datasets/reuters.py +++ b/keras/datasets/reuters.py @@ -73,8 +73,8 @@ def load_data( maxlen: int or None. Maximum sequence length. Any longer sequence will be truncated. None means no truncation. Defaults to `None`. - test_split: Float between `0.` and `1.`. Fraction of the dataset to be used - as test data. 0.2 means that 20% of the dataset is used as + test_split: Float between `0.` and `1.`. Fraction of the dataset to be + used as test data. `0.2` means that 20% of the dataset is used as test data. Defaults to `0.2`. seed: int. Seed for reproducible data shuffling. start_char: int. The start of a sequence will be marked with this