diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 000000000..ee6b16a11 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,31 @@ +name: "Ruff" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '33 1 * * 3' + +jobs: + ruff: + name: "See: docs.astral.sh/ruff" + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: cd and ls + run: | + cd ${{ github.workspace }} + ls -l + - name: Download ruff + run: | + wget -c https://github.com/astral-sh/ruff/releases/download/v0.0.290/ruff-x86_64-unknown-linux-gnu.tar.gz + tar -xzf ruff-x86_64-unknown-linux-gnu.tar.gz + ls -l ruff + chmod a+x ruff + - name: Run ruff + run: ./ruff ./pyglossary \ No newline at end of file diff --git a/pyglossary/glossary_v2.py b/pyglossary/glossary_v2.py index 956f29d58..ca1d22c5e 100644 --- a/pyglossary/glossary_v2.py +++ b/pyglossary/glossary_v2.py @@ -120,11 +120,14 @@ class GlossaryCommon(GlossaryInfo, GlossaryProgress, PluginManager): These methods do not exist in glossary_v2.py (but still exist in glossary.py) - - read(): you can use directRead() then iterate over glossary + - read(): + you can use directRead() then iterate over glossary - - sortWords(): you have to sort entries yourself (when adding or after directRead) + - sortWords(): + you have to sort entries yourself (when adding or after directRead) - - updateIter(): no longer needed, and does't do anything in glossary.py + - updateIter(): + no longer needed, and does't do anything in glossary.py """ diff --git a/pyglossary/plugin_lib/readmdict.py b/pyglossary/plugin_lib/readmdict.py index c5ee8c5ed..90d335152 100644 --- a/pyglossary/plugin_lib/readmdict.py +++ b/pyglossary/plugin_lib/readmdict.py @@ -285,7 +285,9 @@ def _decode_key_block_info(self, key_block_info_compressed): key_block_info[i:i+self._number_width], )[0] i += self._number_width - key_block_info_list += [(key_block_compressed_size, key_block_decompressed_size)] + key_block_info_list.append( + (key_block_compressed_size, key_block_decompressed_size), + ) # assert num_entries == self._num_entries @@ -363,7 +365,8 @@ def _read_header(self): # encryption flag # 0x00 - no encryption, "Allow export to text" is checked in MdxBuilder 3. # 0x01 - encrypt record block, "Encryption Key" is given in MdxBuilder 3. - # 0x02 - encrypt key info block, "Allow export to text" is unchecked in MdxBuilder 3. + # 0x02 - encrypt key info block, + # "Allow export to text" is unchecked in MdxBuilder 3. if b'Encrypted' not in header_tag or header_tag[b'Encrypted'] == b'No': self._encrypt = 0 elif header_tag[b'Encrypted'] == b'Yes': @@ -485,7 +488,8 @@ def _read_keys_v1v2(self): adler32 = unpack('>I', f.read(4))[0] assert adler32 == (zlib.adler32(block) & 0xffffffff) - # read key block info, which indicates key block's compressed and decompressed size + # read key block info, which indicates key block's compressed + # and decompressed size key_block_info = f.read(key_block_info_size) key_block_info_list = self._decode_key_block_info(key_block_info) assert num_key_blocks == len(key_block_info_list) @@ -572,7 +576,10 @@ def _read_records_v3(self): for _ in range(num_record_blocks): decompressed_size = self._read_int32(f) compressed_size = self._read_int32(f) - record_block = self._decode_block(f.read(compressed_size), decompressed_size) + record_block = self._decode_block( + f.read(compressed_size), + decompressed_size, + ) # split record block according to the offset info from key block while i < len(self._key_list): @@ -618,7 +625,10 @@ def _read_records_v1v2(self): for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) try: - record_block = self._decode_block(record_block_compressed, decompressed_size) + record_block = self._decode_block( + record_block_compressed, + decompressed_size, + ) except zlib.error: log.error("zlib decompress error") log.debug(f"record_block_compressed = {record_block_compressed!r}") diff --git a/pyglossary/plugin_manager.py b/pyglossary/plugin_manager.py index fb223de53..7e80d3f09 100644 --- a/pyglossary/plugin_manager.py +++ b/pyglossary/plugin_manager.py @@ -291,7 +291,9 @@ def error(msg: str) -> None: if not inputFilename: return error(f"Invalid filename {filename!r}") # type: ignore if not plugin: - return error("No filename nor format is given for output file") # type: ignore + return error( + "No filename nor format is given for output file", + ) # type: ignore filename = splitext(inputFilename)[0] + plugin.ext return DetectedFormat(filename, plugin.name, "") @@ -307,7 +309,9 @@ def error(msg: str) -> None: return error("Unable to detect output format!") # type: ignore if not plugin.canWrite: - return error(f"plugin {plugin.name} does not support writing") # type: ignore + return error( + f"plugin {plugin.name} does not support writing", + ) # type: ignore if compression in getattr(plugin.writerClass, "compressions", []): compression = "" diff --git a/pyglossary/plugin_prop.py b/pyglossary/plugin_prop.py index 51529043e..e3a2014c8 100644 --- a/pyglossary/plugin_prop.py +++ b/pyglossary/plugin_prop.py @@ -325,7 +325,10 @@ def getOptionsFromClass(self, rwclass: "type") -> "dict[str, Any]": continue prop = optionsProp[name] if prop.disabled: - core.trace(log, f"skipping disabled option {name} in {self.name} plugin") + core.trace( + log, + f"skipping disabled option {name} in {self.name} plugin", + ) continue if not prop.validate(default): log.warning( diff --git a/pyglossary/plugins/appledict_bin/__init__.py b/pyglossary/plugins/appledict_bin/__init__.py index db6dfd42c..03e7b6a00 100644 --- a/pyglossary/plugins/appledict_bin/__init__.py +++ b/pyglossary/plugins/appledict_bin/__init__.py @@ -117,7 +117,8 @@ def __init__(self, glos: GlossaryType) -> None: def tostring( self, - elem: "Element | HtmlComment | HtmlElement | HtmlEntity | HtmlProcessingInstruction", + elem: "Element | HtmlComment | HtmlElement" + " | HtmlEntity | HtmlProcessingInstruction", ) -> str: from lxml.html import tostring as tostring @@ -461,7 +462,9 @@ def readEntryIds(self) -> None: continue title_j = entryBytes.find(b'"', title_i + 9) if title_j < 0: - log.error(f"title closing not found: {entryBytes.decode(self._encoding)}") + log.error( + f"title closing not found: {entryBytes.decode(self._encoding)}", + ) continue titleById[_id] = entryBytes[title_i + 9: title_j].decode(self._encoding) @@ -493,7 +496,9 @@ def setKeyTextData( decompressedSectionByteLen = readInt(keyTextFile) if compressedSectionByteLen == decompressedSectionByteLen == 0: break - chunk_section_compressed = keyTextFile.read(compressedSectionByteLen - 4) + chunk_section_compressed = keyTextFile.read( + compressedSectionByteLen - 4, + ) chunksection_bytes = decompress(chunk_section_compressed ) buff.write(chunksection_bytes) fileLimitDecompressed += decompressedSectionByteLen @@ -546,7 +551,8 @@ def readKeyTextData( small_len = read_2_bytes_here(buff) # 0x2c curr_offset = buff.tell() next_lexeme_offset = curr_offset + small_len - # the resulting number must match with Contents/Body.data address of the entry + # the resulting number must match with Contents/Body.data + # address of the entry articleAddress: ArticleAddress if properties.body_has_sections: chunkOffset = readInt(buff) @@ -589,7 +595,9 @@ def readKeyTextData( # d:priority=".." between 0x00..0x12, priority = [0..9] priority = (priorityAndParentalControl - parentalControl) // 2 else: - log.error(f"Unknown private field: {properties.key_text_fixed_fields}") + log.error( + f"Unknown private field: {properties.key_text_fixed_fields}", + ) return {} keyTextFields: "list[str]" = [] diff --git a/pyglossary/plugins/appledict_bin/appledict_properties.py b/pyglossary/plugins/appledict_bin/appledict_properties.py index fec94011f..873ec4189 100644 --- a/pyglossary/plugins/appledict_bin/appledict_properties.py +++ b/pyglossary/plugins/appledict_bin/appledict_properties.py @@ -39,7 +39,8 @@ class AppleDictProperties: key_text_fixed_fields: list[str] # in plist file: "IDXIndexDataFields" / "IDXVariableDataFields" - # Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", "DCSAnchor", "DCSYomiWord"] + # Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", + # "DCSAnchor", "DCSYomiWord"] key_text_variable_fields: list[str] # DCSDictionaryCSS, generally "DefaultStyle.css" @@ -73,8 +74,10 @@ def from_metadata(metadata: dict) -> AppleDictProperties: external_data_fields[0].get("IDXDataSize") == 8 ) - if 'TrieAuxiliaryDataOptions' in key_text_metadata and 'HeapDataCompressionType' in \ - key_text_metadata['TrieAuxiliaryDataOptions']: + if ( + 'TrieAuxiliaryDataOptions' in key_text_metadata and + 'HeapDataCompressionType' in key_text_metadata['TrieAuxiliaryDataOptions'] + ): key_text_compression_type = \ key_text_metadata['TrieAuxiliaryDataOptions']['HeapDataCompressionType'] else: diff --git a/pyglossary/plugins/cc_cedict/__init__.py b/pyglossary/plugins/cc_cedict/__init__.py index 5f2c30e59..c229c9102 100644 --- a/pyglossary/plugins/cc_cedict/__init__.py +++ b/pyglossary/plugins/cc_cedict/__init__.py @@ -57,7 +57,7 @@ def open(self, filename: str) -> None: self._glos.sourceLangName = "Chinese" self._glos.targetLangName = "English" - self.file = open(filename, "r", encoding=self._encoding) + self.file = open(filename, encoding=self._encoding) for line in self.file: match = entry_count_reg.match(line) if match is not None: diff --git a/pyglossary/plugins/crawler_dir.py b/pyglossary/plugins/crawler_dir.py index f773ac1d3..31967ce22 100644 --- a/pyglossary/plugins/crawler_dir.py +++ b/pyglossary/plugins/crawler_dir.py @@ -127,7 +127,7 @@ def open(self, filename: str) -> None: self._filename = filename - with open(join(filename, "info.json"), "r", encoding="utf-8") as infoFp: + with open(join(filename, "info.json"), encoding="utf-8") as infoFp: info = jsonToOrderedData(infoFp.read()) self._wordCount = info.pop("wordCount") for key, value in info.items(): diff --git a/pyglossary/plugins/edlin.py b/pyglossary/plugins/edlin.py index 3dffa2acf..41bb4146d 100644 --- a/pyglossary/plugins/edlin.py +++ b/pyglossary/plugins/edlin.py @@ -87,7 +87,7 @@ def open(self, filename: str) -> None: ) self._filename = filename - with open(infoFname, "r", encoding=self._encoding) as infoFp: + with open(infoFname, encoding=self._encoding) as infoFp: info = jsonToOrderedData(infoFp.read()) self._wordCount = info.pop("wordCount") self._prev_link = info.pop("prev_link") @@ -121,7 +121,6 @@ def __iter__(self) -> "Iterator[EntryType]": with open( join(self._filename, nextPath), - "r", encoding=self._encoding, ) as _file: header = _file.readline().rstrip() diff --git a/pyglossary/plugins/gettext_po.py b/pyglossary/plugins/gettext_po.py index 01616653f..2f33a5b93 100644 --- a/pyglossary/plugins/gettext_po.py +++ b/pyglossary/plugins/gettext_po.py @@ -135,7 +135,7 @@ def __init__(self, glos: GlossaryType) -> None: def open(self, filename: str) -> None: self._filename = filename - self._file = _file = open(filename, mode="wt", encoding="utf-8") + self._file = _file = open(filename, mode="w", encoding="utf-8") _file.write('#\nmsgid ""\nmsgstr ""\n') for key, value in self._glos.iterInfo(): _file.write(f'"{key}: {value}\\n"\n') diff --git a/pyglossary/plugins/html_dir.py b/pyglossary/plugins/html_dir.py index 699b620a3..bb3ff2abd 100644 --- a/pyglossary/plugins/html_dir.py +++ b/pyglossary/plugins/html_dir.py @@ -167,7 +167,6 @@ def fixLinks(self, linkTargetSet: "set[str]") -> None: if word not in linkTargetSet: continue if word in fileByWord: - # log.info(f'fileByWord[{word}]={fileByWord[word]}, filename={filename}') fileByWord[word].append((filename, entryIndex)) else: fileByWord[word] = [(filename, entryIndex)] @@ -484,7 +483,10 @@ def addLinks(text: str, pos: int) -> None: '
\n' ) pos = fileObj.tell() - if pos > initFileSizeMax and pos > max_file_size - len(text.encode(encoding)): + if ( + pos > initFileSizeMax and + pos > max_file_size - len(text.encode(encoding)) + ): fileObj = self.nextFile() fileObj.write(pageHeader( len(self._filenameList) - 1, diff --git a/pyglossary/plugins/info_plugin.py b/pyglossary/plugins/info_plugin.py index 79689ae89..dbaa27428 100644 --- a/pyglossary/plugins/info_plugin.py +++ b/pyglossary/plugins/info_plugin.py @@ -38,7 +38,7 @@ def __init__(self, glos: GlossaryType) -> None: def open(self, filename: str) -> None: self._filename = filename - self._file = open(filename, mode="wt", encoding="utf-8") + self._file = open(filename, mode="w", encoding="utf-8") def finish(self) -> None: self._filename = "" @@ -172,7 +172,7 @@ def close(self) -> None: def open(self, filename: str) -> None: from pyglossary.json_utils import jsonToOrderedData - with open(filename, "r", encoding="utf-8") as infoFp: + with open(filename, encoding="utf-8") as infoFp: info = jsonToOrderedData(infoFp.read()) for key, value in info.items(): self._glos.setInfo(key, value) diff --git a/pyglossary/plugins/sql.py b/pyglossary/plugins/sql.py index 0ab7ffc2b..0cbcd7ffb 100644 --- a/pyglossary/plugins/sql.py +++ b/pyglossary/plugins/sql.py @@ -54,7 +54,7 @@ def finish(self) -> None: def open(self, filename: str) -> None: self._filename = filename - self._file = open(filename, "wt", encoding=self._encoding) + self._file = open(filename, "w", encoding=self._encoding) self._writeInfo() def _writeInfo(self) -> None: diff --git a/pyglossary/plugins/wordnet.py b/pyglossary/plugins/wordnet.py index d88555694..a2a846cf7 100644 --- a/pyglossary/plugins/wordnet.py +++ b/pyglossary/plugins/wordnet.py @@ -261,7 +261,8 @@ def a(word): symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol] except KeyError: print( - f"WARNING: unknown pointer symbol {symbol} for {synset.ss_type} ", + f"WARNING: unknown pointer symbol {symbol}" + f" for {synset.ss_type} ", ) symbol_desc = symbol diff --git a/pyglossary/slob.py b/pyglossary/slob.py index d0d593f32..d4e01b63f 100644 --- a/pyglossary/slob.py +++ b/pyglossary/slob.py @@ -1138,7 +1138,8 @@ def __init__( self.current_bin: "BinMemWriter | None" = None - created_at = os.getenv("SLOB_TIMESTAMP") or datetime.now(timezone.utc).isoformat() + created_at = os.getenv("SLOB_TIMESTAMP") or \ + datetime.now(timezone.utc).isoformat() self.blob_count = 0 self.ref_count = 0 @@ -1310,6 +1311,14 @@ def _sort(self) -> None: def _resolve_aliases(self) -> None: self._fire_event('begin_resolve_aliases') self.f_aliases.finalize() + + def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]": + key_frag = pickle.loads(item.content) + if isinstance(key_frag, str): + return key_frag, default_fragment + to_key, fragment = key_frag + return to_key, fragment + with MultiFileReader( self.f_ref_positions.name, self.f_refs.name, @@ -1327,13 +1336,6 @@ def _resolve_aliases(self) -> None: version_info=False, ) - def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]": - key_frag = pickle.loads(item.content) - if isinstance(key_frag, str): - return key_frag, default_fragment - to_key, fragment = key_frag - return to_key, fragment - for item in aliasesSlob: from_key = item.key keys = set() @@ -1408,6 +1410,12 @@ def finalize(self) -> None: buf_size = 10 * 1024 * 1024 + def write_tags(tags: "MappingProxyType[str, Any]", f: "StructWriter") -> None: + f.write(pack(U_CHAR, len(tags))) + for key, value in tags.items(): + f.write_tiny_text(key) + f.write_tiny_text(value, editable=True) + with fopen(self.filename, mode='wb') as output_file: out = StructWriter(output_file, self.encoding) out.write(MAGIC) @@ -1415,12 +1423,6 @@ def finalize(self) -> None: out.write_tiny_text(self.encoding, encoding=UTF8) out.write_tiny_text(self.compression) - def write_tags(tags: "MappingProxyType[str, Any]", f: "StructWriter") -> None: - f.write(pack(U_CHAR, len(tags))) - for key, value in tags.items(): - f.write_tiny_text(key) - f.write_tiny_text(value, editable=True) - write_tags(self.tags, out) def write_content_types( diff --git a/pyglossary/ui/gtk3_utils/about.py b/pyglossary/ui/gtk3_utils/about.py index 7066a3c97..ed7885935 100644 --- a/pyglossary/ui/gtk3_utils/about.py +++ b/pyglossary/ui/gtk3_utils/about.py @@ -85,7 +85,8 @@ def newTabWidgetTextView( tv.set_cursor_visible(False) tv.set_border_width(10) buf = tv.get_buffer() - # buf.insert_markup(buf.get_end_iter(), markup=text, len=len(text.encode("utf-8"))) + # buf.insert_markup(buf.get_end_iter(), markup=text, + # len=len(text.encode("utf-8"))) buf.set_text(text) tv.show_all() swin = gtk.ScrolledWindow() diff --git a/pyglossary/ui/gtk4_utils/about.py b/pyglossary/ui/gtk4_utils/about.py index f29245f76..62b1d332a 100644 --- a/pyglossary/ui/gtk4_utils/about.py +++ b/pyglossary/ui/gtk4_utils/about.py @@ -46,7 +46,8 @@ def __init__(self, title: str, icon: str) -> None: # height = int(size * 1.5) # return height, height - # returns: (minimum: int, natural: int, minimum_baseline: int, natural_baseline: int) + # returns: (minimum: int, natural: int, + # minimum_baseline: int, natural_baseline: int) #def do_measure(self, orientation, for_size): # return (for_size, for_size, for_size, for_size) @@ -110,7 +111,8 @@ def newTabWidgetTextView( tv.set_cursor_visible(False) #tv.set_border_width(10) buf = tv.get_buffer() - # buf.insert_markup(buf.get_end_iter(), markup=text, len=len(text.encode("utf-8"))) + # buf.insert_markup(buf.get_end_iter(), markup=text, + # len=len(text.encode("utf-8"))) buf.set_text(text) tv.show() swin = gtk.ScrolledWindow() diff --git a/pyglossary/ui/ui_gtk.py b/pyglossary/ui/ui_gtk.py index d9a39c1f0..7a0b327a5 100644 --- a/pyglossary/ui/ui_gtk.py +++ b/pyglossary/ui/ui_gtk.py @@ -1592,7 +1592,10 @@ def reverseInputEntryChanged(self, widget=None): inPath = urlToPath(inPath) self.reverseInputEntry.set_text(inPath) - if self.config["ui_autoSetFormat"] and not self.reverseInputFormatCombo.getActive(): + if ( + self.config["ui_autoSetFormat"] and + not self.reverseInputFormatCombo.getActive() + ): inputArgs = Glossary.detectInputFormat(inPath, quiet=True) if inputArgs: inFormat = inputArgs[1] diff --git a/pyglossary/ui/ui_gtk4.py b/pyglossary/ui/ui_gtk4.py index 09723a203..cc32ebd8e 100644 --- a/pyglossary/ui/ui_gtk4.py +++ b/pyglossary/ui/ui_gtk4.py @@ -1684,7 +1684,10 @@ def reverseInputEntryChanged(self, widget=None): inPath = urlToPath(inPath) self.reverseInputEntry.set_text(inPath) - if self.config["ui_autoSetFormat"] and not self.reverseInputFormatCombo.getActive(): + if ( + self.config["ui_autoSetFormat"] and + not self.reverseInputFormatCombo.getActive() + ): inputArgs = Glossary.detectInputFormat(inPath, quiet=True) if inputArgs: self.reverseInputFormatCombo.setActive(inputArgs.formatName) diff --git a/pyproject.toml b/pyproject.toml index 304df664b..5306f41d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ ignore = [ "D401", # First line of docstring should be in imperative mood "D417", # Missing argument descriptions in the docstring "E402", # Module level import not at top of file + "E721", # Do not compare types, use `isinstance()` "SIM105", # Use contextlib.suppress({exception}) instead of try-except-pass "SIM117", # Use a single with statement with multiple contexts... "UP009", # UTF-8 encoding declaration is unnecessary