diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 000000000..ee6b16a11 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,31 @@ +name: "Ruff" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '33 1 * * 3' + +jobs: + ruff: + name: "See: docs.astral.sh/ruff" + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: cd and ls + run: | + cd ${{ github.workspace }} + ls -l + - name: Download ruff + run: | + wget -c https://github.com/astral-sh/ruff/releases/download/v0.0.290/ruff-x86_64-unknown-linux-gnu.tar.gz + tar -xzf ruff-x86_64-unknown-linux-gnu.tar.gz + ls -l ruff + chmod a+x ruff + - name: Run ruff + run: ./ruff ./pyglossary \ No newline at end of file diff --git a/pyglossary/glossary_v2.py b/pyglossary/glossary_v2.py index 956f29d58..ca1d22c5e 100644 --- a/pyglossary/glossary_v2.py +++ b/pyglossary/glossary_v2.py @@ -120,11 +120,14 @@ class GlossaryCommon(GlossaryInfo, GlossaryProgress, PluginManager): These methods do not exist in glossary_v2.py (but still exist in glossary.py) - - read(): you can use directRead() then iterate over glossary + - read(): + you can use directRead() then iterate over glossary - - sortWords(): you have to sort entries yourself (when adding or after directRead) + - sortWords(): + you have to sort entries yourself (when adding or after directRead) - - updateIter(): no longer needed, and does't do anything in glossary.py + - updateIter(): + no longer needed, and does't do anything in glossary.py """ diff --git a/pyglossary/plugin_lib/readmdict.py b/pyglossary/plugin_lib/readmdict.py index c5ee8c5ed..90d335152 100644 --- a/pyglossary/plugin_lib/readmdict.py +++ b/pyglossary/plugin_lib/readmdict.py @@ -285,7 +285,9 @@ def _decode_key_block_info(self, key_block_info_compressed): key_block_info[i:i+self._number_width], )[0] i += self._number_width - key_block_info_list += [(key_block_compressed_size, key_block_decompressed_size)] + key_block_info_list.append( + (key_block_compressed_size, key_block_decompressed_size), + ) # assert num_entries == self._num_entries @@ -363,7 +365,8 @@ def _read_header(self): # encryption flag # 0x00 - no encryption, "Allow export to text" is checked in MdxBuilder 3. # 0x01 - encrypt record block, "Encryption Key" is given in MdxBuilder 3. - # 0x02 - encrypt key info block, "Allow export to text" is unchecked in MdxBuilder 3. + # 0x02 - encrypt key info block, + # "Allow export to text" is unchecked in MdxBuilder 3. if b'Encrypted' not in header_tag or header_tag[b'Encrypted'] == b'No': self._encrypt = 0 elif header_tag[b'Encrypted'] == b'Yes': @@ -485,7 +488,8 @@ def _read_keys_v1v2(self): adler32 = unpack('>I', f.read(4))[0] assert adler32 == (zlib.adler32(block) & 0xffffffff) - # read key block info, which indicates key block's compressed and decompressed size + # read key block info, which indicates key block's compressed + # and decompressed size key_block_info = f.read(key_block_info_size) key_block_info_list = self._decode_key_block_info(key_block_info) assert num_key_blocks == len(key_block_info_list) @@ -572,7 +576,10 @@ def _read_records_v3(self): for _ in range(num_record_blocks): decompressed_size = self._read_int32(f) compressed_size = self._read_int32(f) - record_block = self._decode_block(f.read(compressed_size), decompressed_size) + record_block = self._decode_block( + f.read(compressed_size), + decompressed_size, + ) # split record block according to the offset info from key block while i < len(self._key_list): @@ -618,7 +625,10 @@ def _read_records_v1v2(self): for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) try: - record_block = self._decode_block(record_block_compressed, decompressed_size) + record_block = self._decode_block( + record_block_compressed, + decompressed_size, + ) except zlib.error: log.error("zlib decompress error") log.debug(f"record_block_compressed = {record_block_compressed!r}") diff --git a/pyglossary/plugin_manager.py b/pyglossary/plugin_manager.py index fb223de53..7e80d3f09 100644 --- a/pyglossary/plugin_manager.py +++ b/pyglossary/plugin_manager.py @@ -291,7 +291,9 @@ def error(msg: str) -> None: if not inputFilename: return error(f"Invalid filename {filename!r}") # type: ignore if not plugin: - return error("No filename nor format is given for output file") # type: ignore + return error( + "No filename nor format is given for output file", + ) # type: ignore filename = splitext(inputFilename)[0] + plugin.ext return DetectedFormat(filename, plugin.name, "") @@ -307,7 +309,9 @@ def error(msg: str) -> None: return error("Unable to detect output format!") # type: ignore if not plugin.canWrite: - return error(f"plugin {plugin.name} does not support writing") # type: ignore + return error( + f"plugin {plugin.name} does not support writing", + ) # type: ignore if compression in getattr(plugin.writerClass, "compressions", []): compression = "" diff --git a/pyglossary/plugin_prop.py b/pyglossary/plugin_prop.py index 51529043e..e3a2014c8 100644 --- a/pyglossary/plugin_prop.py +++ b/pyglossary/plugin_prop.py @@ -325,7 +325,10 @@ def getOptionsFromClass(self, rwclass: "type") -> "dict[str, Any]": continue prop = optionsProp[name] if prop.disabled: - core.trace(log, f"skipping disabled option {name} in {self.name} plugin") + core.trace( + log, + f"skipping disabled option {name} in {self.name} plugin", + ) continue if not prop.validate(default): log.warning( diff --git a/pyglossary/plugins/appledict_bin/__init__.py b/pyglossary/plugins/appledict_bin/__init__.py index db6dfd42c..03e7b6a00 100644 --- a/pyglossary/plugins/appledict_bin/__init__.py +++ b/pyglossary/plugins/appledict_bin/__init__.py @@ -117,7 +117,8 @@ def __init__(self, glos: GlossaryType) -> None: def tostring( self, - elem: "Element | HtmlComment | HtmlElement | HtmlEntity | HtmlProcessingInstruction", + elem: "Element | HtmlComment | HtmlElement" + " | HtmlEntity | HtmlProcessingInstruction", ) -> str: from lxml.html import tostring as tostring @@ -461,7 +462,9 @@ def readEntryIds(self) -> None: continue title_j = entryBytes.find(b'"', title_i + 9) if title_j < 0: - log.error(f"title closing not found: {entryBytes.decode(self._encoding)}") + log.error( + f"title closing not found: {entryBytes.decode(self._encoding)}", + ) continue titleById[_id] = entryBytes[title_i + 9: title_j].decode(self._encoding) @@ -493,7 +496,9 @@ def setKeyTextData( decompressedSectionByteLen = readInt(keyTextFile) if compressedSectionByteLen == decompressedSectionByteLen == 0: break - chunk_section_compressed = keyTextFile.read(compressedSectionByteLen - 4) + chunk_section_compressed = keyTextFile.read( + compressedSectionByteLen - 4, + ) chunksection_bytes = decompress(chunk_section_compressed ) buff.write(chunksection_bytes) fileLimitDecompressed += decompressedSectionByteLen @@ -546,7 +551,8 @@ def readKeyTextData( small_len = read_2_bytes_here(buff) # 0x2c curr_offset = buff.tell() next_lexeme_offset = curr_offset + small_len - # the resulting number must match with Contents/Body.data address of the entry + # the resulting number must match with Contents/Body.data + # address of the entry articleAddress: ArticleAddress if properties.body_has_sections: chunkOffset = readInt(buff) @@ -589,7 +595,9 @@ def readKeyTextData( # d:priority=".." between 0x00..0x12, priority = [0..9] priority = (priorityAndParentalControl - parentalControl) // 2 else: - log.error(f"Unknown private field: {properties.key_text_fixed_fields}") + log.error( + f"Unknown private field: {properties.key_text_fixed_fields}", + ) return {} keyTextFields: "list[str]" = [] diff --git a/pyglossary/plugins/appledict_bin/appledict_properties.py b/pyglossary/plugins/appledict_bin/appledict_properties.py index fec94011f..873ec4189 100644 --- a/pyglossary/plugins/appledict_bin/appledict_properties.py +++ b/pyglossary/plugins/appledict_bin/appledict_properties.py @@ -39,7 +39,8 @@ class AppleDictProperties: key_text_fixed_fields: list[str] # in plist file: "IDXIndexDataFields" / "IDXVariableDataFields" - # Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", "DCSAnchor", "DCSYomiWord"] + # Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", + # "DCSAnchor", "DCSYomiWord"] key_text_variable_fields: list[str] # DCSDictionaryCSS, generally "DefaultStyle.css" @@ -73,8 +74,10 @@ def from_metadata(metadata: dict) -> AppleDictProperties: external_data_fields[0].get("IDXDataSize") == 8 ) - if 'TrieAuxiliaryDataOptions' in key_text_metadata and 'HeapDataCompressionType' in \ - key_text_metadata['TrieAuxiliaryDataOptions']: + if ( + 'TrieAuxiliaryDataOptions' in key_text_metadata and + 'HeapDataCompressionType' in key_text_metadata['TrieAuxiliaryDataOptions'] + ): key_text_compression_type = \ key_text_metadata['TrieAuxiliaryDataOptions']['HeapDataCompressionType'] else: diff --git a/pyglossary/plugins/cc_cedict/__init__.py b/pyglossary/plugins/cc_cedict/__init__.py index 5f2c30e59..c229c9102 100644 --- a/pyglossary/plugins/cc_cedict/__init__.py +++ b/pyglossary/plugins/cc_cedict/__init__.py @@ -57,7 +57,7 @@ def open(self, filename: str) -> None: self._glos.sourceLangName = "Chinese" self._glos.targetLangName = "English" - self.file = open(filename, "r", encoding=self._encoding) + self.file = open(filename, encoding=self._encoding) for line in self.file: match = entry_count_reg.match(line) if match is not None: diff --git a/pyglossary/plugins/crawler_dir.py b/pyglossary/plugins/crawler_dir.py index f773ac1d3..31967ce22 100644 --- a/pyglossary/plugins/crawler_dir.py +++ b/pyglossary/plugins/crawler_dir.py @@ -127,7 +127,7 @@ def open(self, filename: str) -> None: self._filename = filename - with open(join(filename, "info.json"), "r", encoding="utf-8") as infoFp: + with open(join(filename, "info.json"), encoding="utf-8") as infoFp: info = jsonToOrderedData(infoFp.read()) self._wordCount = info.pop("wordCount") for key, value in info.items(): diff --git a/pyglossary/plugins/edlin.py b/pyglossary/plugins/edlin.py index 3dffa2acf..41bb4146d 100644 --- a/pyglossary/plugins/edlin.py +++ b/pyglossary/plugins/edlin.py @@ -87,7 +87,7 @@ def open(self, filename: str) -> None: ) self._filename = filename - with open(infoFname, "r", encoding=self._encoding) as infoFp: + with open(infoFname, encoding=self._encoding) as infoFp: info = jsonToOrderedData(infoFp.read()) self._wordCount = info.pop("wordCount") self._prev_link = info.pop("prev_link") @@ -121,7 +121,6 @@ def __iter__(self) -> "Iterator[EntryType]": with open( join(self._filename, nextPath), - "r", encoding=self._encoding, ) as _file: header = _file.readline().rstrip() diff --git a/pyglossary/plugins/gettext_po.py b/pyglossary/plugins/gettext_po.py index 01616653f..2f33a5b93 100644 --- a/pyglossary/plugins/gettext_po.py +++ b/pyglossary/plugins/gettext_po.py @@ -135,7 +135,7 @@ def __init__(self, glos: GlossaryType) -> None: def open(self, filename: str) -> None: self._filename = filename - self._file = _file = open(filename, mode="wt", encoding="utf-8") + self._file = _file = open(filename, mode="w", encoding="utf-8") _file.write('#\nmsgid ""\nmsgstr ""\n') for key, value in self._glos.iterInfo(): _file.write(f'"{key}: {value}\\n"\n') diff --git a/pyglossary/plugins/html_dir.py b/pyglossary/plugins/html_dir.py index 699b620a3..bb3ff2abd 100644 --- a/pyglossary/plugins/html_dir.py +++ b/pyglossary/plugins/html_dir.py @@ -167,7 +167,6 @@ def fixLinks(self, linkTargetSet: "set[str]") -> None: if word not in linkTargetSet: continue if word in fileByWord: - # log.info(f'fileByWord[{word}]={fileByWord[word]}, filename={filename}') fileByWord[word].append((filename, entryIndex)) else: fileByWord[word] = [(filename, entryIndex)] @@ -484,7 +483,10 @@ def addLinks(text: str, pos: int) -> None: '