ilius · ilius · Sep 17, 2023 · Sep 17, 2023 · Sep 17, 2023 · Sep 17, 2023
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -0,0 +1,31 @@
+name: "Ruff"
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '33 1 * * 3'
+
+jobs:
+  ruff:
+    name: "See: docs.astral.sh/ruff"
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: cd and ls
+        run: |
+          cd ${{ github.workspace }}
+          ls -l
+      - name: Download ruff
+        run: |
+          wget -c https://github.com/astral-sh/ruff/releases/download/v0.0.290/ruff-x86_64-unknown-linux-gnu.tar.gz
+          tar -xzf ruff-x86_64-unknown-linux-gnu.tar.gz
+          ls -l ruff
+          chmod a+x ruff
+      - name: Run ruff
+        run: ./ruff ./pyglossary
diff --git a/pyglossary/glossary_v2.py b/pyglossary/glossary_v2.py
@@ -120,11 +120,14 @@ class GlossaryCommon(GlossaryInfo, GlossaryProgress, PluginManager):
 
 	These methods do not exist in glossary_v2.py (but still exist in glossary.py)
 
-		- read(): you can use directRead() then iterate over glossary
+		- read():
+			you can use directRead() then iterate over glossary
 
-		- sortWords(): you have to sort entries yourself (when adding or after directRead)
+		- sortWords():
+			you have to sort entries yourself (when adding or after directRead)
 
-		- updateIter(): no longer needed, and does't do anything in glossary.py
+		- updateIter():
+			no longer needed, and does't do anything in glossary.py
 
 	"""
 

diff --git a/pyglossary/plugin_lib/readmdict.py b/pyglossary/plugin_lib/readmdict.py
@@ -285,7 +285,9 @@ def _decode_key_block_info(self, key_block_info_compressed):
 				key_block_info[i:i+self._number_width],
 			)[0]
 			i += self._number_width
-			key_block_info_list += [(key_block_compressed_size, key_block_decompressed_size)]
+			key_block_info_list.append(
+				(key_block_compressed_size, key_block_decompressed_size),
+			)
 
 		# assert num_entries == self._num_entries
 
@@ -363,7 +365,8 @@ def _read_header(self):
 		# encryption flag
 		#	0x00 - no encryption, "Allow export to text" is checked in MdxBuilder 3.
 		#	0x01 - encrypt record block, "Encryption Key" is given in MdxBuilder 3.
-		#	0x02 - encrypt key info block, "Allow export to text" is unchecked in MdxBuilder 3.
+		#	0x02 - encrypt key info block,
+		# 			"Allow export to text" is unchecked in MdxBuilder 3.
 		if b'Encrypted' not in header_tag or header_tag[b'Encrypted'] == b'No':
 			self._encrypt = 0
 		elif header_tag[b'Encrypted'] == b'Yes':
@@ -485,7 +488,8 @@ def _read_keys_v1v2(self):
 			adler32 = unpack('>I', f.read(4))[0]
 			assert adler32 == (zlib.adler32(block) & 0xffffffff)
 
-		# read key block info, which indicates key block's compressed and decompressed size
+		# read key block info, which indicates key block's compressed
+		# and decompressed size
 		key_block_info = f.read(key_block_info_size)
 		key_block_info_list = self._decode_key_block_info(key_block_info)
 		assert num_key_blocks == len(key_block_info_list)
@@ -572,7 +576,10 @@ def _read_records_v3(self):
 		for _ in range(num_record_blocks):
 			decompressed_size = self._read_int32(f)
 			compressed_size = self._read_int32(f)
-			record_block = self._decode_block(f.read(compressed_size), decompressed_size)
+			record_block = self._decode_block(
+				f.read(compressed_size),
+				decompressed_size,
+			)
 
 			# split record block according to the offset info from key block
 			while i < len(self._key_list):
@@ -618,7 +625,10 @@ def _read_records_v1v2(self):
 		for compressed_size, decompressed_size in record_block_info_list:
 			record_block_compressed = f.read(compressed_size)
 			try:
-				record_block = self._decode_block(record_block_compressed, decompressed_size)
+				record_block = self._decode_block(
+					record_block_compressed,
+					decompressed_size,
+				)
 			except zlib.error:
 				log.error("zlib decompress error")
 				log.debug(f"record_block_compressed = {record_block_compressed!r}")

diff --git a/pyglossary/plugin_manager.py b/pyglossary/plugin_manager.py
@@ -291,7 +291,9 @@ def error(msg: str) -> None:
 			if not inputFilename:
 				return error(f"Invalid filename {filename!r}")  # type: ignore
 			if not plugin:
-				return error("No filename nor format is given for output file")  # type: ignore
+				return error(
+					"No filename nor format is given for output file",
+				)  # type: ignore
 			filename = splitext(inputFilename)[0] + plugin.ext
 			return DetectedFormat(filename, plugin.name, "")
 
@@ -307,7 +309,9 @@ def error(msg: str) -> None:
 			return error("Unable to detect output format!")  # type: ignore
 
 		if not plugin.canWrite:
-			return error(f"plugin {plugin.name} does not support writing")  # type: ignore
+			return error(
+				f"plugin {plugin.name} does not support writing",
+			)  # type: ignore
 
 		if compression in getattr(plugin.writerClass, "compressions", []):
 			compression = ""

diff --git a/pyglossary/plugin_prop.py b/pyglossary/plugin_prop.py
@@ -325,7 +325,10 @@ def getOptionsFromClass(self, rwclass: "type") -> "dict[str, Any]":
 				continue
 			prop = optionsProp[name]
 			if prop.disabled:
-				core.trace(log, f"skipping disabled option {name} in {self.name} plugin")
+				core.trace(
+					log,
+					f"skipping disabled option {name} in {self.name} plugin",
+				)
 				continue
 			if not prop.validate(default):
 				log.warning(

diff --git a/pyglossary/plugins/appledict_bin/__init__.py b/pyglossary/plugins/appledict_bin/__init__.py
@@ -117,7 +117,8 @@ def __init__(self, glos: GlossaryType) -> None:
 
 	def tostring(
 		self,
-		elem: "Element | HtmlComment | HtmlElement | HtmlEntity | HtmlProcessingInstruction",
+		elem: "Element | HtmlComment | HtmlElement"
+			" | HtmlEntity | HtmlProcessingInstruction",
 	) -> str:
 		from lxml.html import tostring as tostring
 
@@ -461,7 +462,9 @@ def readEntryIds(self) -> None:
 				continue
 			title_j = entryBytes.find(b'"', title_i + 9)
 			if title_j < 0:
-				log.error(f"title closing not found: {entryBytes.decode(self._encoding)}")
+				log.error(
+					f"title closing not found: {entryBytes.decode(self._encoding)}",
+				)
 				continue
 			titleById[_id] = entryBytes[title_i + 9: title_j].decode(self._encoding)
 
@@ -493,7 +496,9 @@ def setKeyTextData(
 					decompressedSectionByteLen = readInt(keyTextFile)
 					if compressedSectionByteLen == decompressedSectionByteLen == 0:
 						break
-					chunk_section_compressed = keyTextFile.read(compressedSectionByteLen - 4)
+					chunk_section_compressed = keyTextFile.read(
+						compressedSectionByteLen - 4,
+					)
 					chunksection_bytes = decompress(chunk_section_compressed )
 					buff.write(chunksection_bytes)
 					fileLimitDecompressed += decompressedSectionByteLen
@@ -546,7 +551,8 @@ def readKeyTextData(
 					small_len = read_2_bytes_here(buff)  # 0x2c
 				curr_offset = buff.tell()
 				next_lexeme_offset = curr_offset + small_len
-				# the resulting number must match with Contents/Body.data address of the entry
+				# the resulting number must match with Contents/Body.data
+				# address of the entry
 				articleAddress: ArticleAddress
 				if properties.body_has_sections:
 					chunkOffset = readInt(buff)
@@ -589,7 +595,9 @@ def readKeyTextData(
 					# d:priority=".." between 0x00..0x12, priority = [0..9]
 					priority = (priorityAndParentalControl - parentalControl) // 2
 				else:
-					log.error(f"Unknown private field: {properties.key_text_fixed_fields}")
+					log.error(
+						f"Unknown private field: {properties.key_text_fixed_fields}",
+					)
 					return {}
 
 				keyTextFields: "list[str]" = []

diff --git a/pyglossary/plugins/appledict_bin/appledict_properties.py b/pyglossary/plugins/appledict_bin/appledict_properties.py
@@ -39,7 +39,8 @@ class AppleDictProperties:
 	key_text_fixed_fields: list[str]
 
 	# in plist file: "IDXIndexDataFields" / "IDXVariableDataFields"
-	# Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", "DCSAnchor", "DCSYomiWord"]
+	# Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle",
+	# "DCSAnchor", "DCSYomiWord"]
 	key_text_variable_fields: list[str]
 
 	# DCSDictionaryCSS, generally "DefaultStyle.css"
@@ -73,8 +74,10 @@ def from_metadata(metadata: dict) -> AppleDictProperties:
 		external_data_fields[0].get("IDXDataSize") == 8
 	)
 
-	if 'TrieAuxiliaryDataOptions' in key_text_metadata and 'HeapDataCompressionType' in \
-			key_text_metadata['TrieAuxiliaryDataOptions']:
+	if (
+		'TrieAuxiliaryDataOptions' in key_text_metadata and
+		'HeapDataCompressionType' in key_text_metadata['TrieAuxiliaryDataOptions']
+	):
 		key_text_compression_type = \
 			key_text_metadata['TrieAuxiliaryDataOptions']['HeapDataCompressionType']
 	else:

diff --git a/pyglossary/plugins/cc_cedict/__init__.py b/pyglossary/plugins/cc_cedict/__init__.py
@@ -57,7 +57,7 @@ def open(self, filename: str) -> None:
 		self._glos.sourceLangName = "Chinese"
 		self._glos.targetLangName = "English"
 
-		self.file = open(filename, "r", encoding=self._encoding)
+		self.file = open(filename, encoding=self._encoding)
 		for line in self.file:
 			match = entry_count_reg.match(line)
 			if match is not None:

diff --git a/pyglossary/plugins/crawler_dir.py b/pyglossary/plugins/crawler_dir.py
@@ -127,7 +127,7 @@ def open(self, filename: str) -> None:
 
 		self._filename = filename
 
-		with open(join(filename, "info.json"), "r", encoding="utf-8") as infoFp:
+		with open(join(filename, "info.json"), encoding="utf-8") as infoFp:
 			info = jsonToOrderedData(infoFp.read())
 		self._wordCount = info.pop("wordCount")
 		for key, value in info.items():

diff --git a/pyglossary/plugins/edlin.py b/pyglossary/plugins/edlin.py
@@ -87,7 +87,7 @@ def open(self, filename: str) -> None:
 			)
 		self._filename = filename
 
-		with open(infoFname, "r", encoding=self._encoding) as infoFp:
+		with open(infoFname, encoding=self._encoding) as infoFp:
 			info = jsonToOrderedData(infoFp.read())
 		self._wordCount = info.pop("wordCount")
 		self._prev_link = info.pop("prev_link")
@@ -121,7 +121,6 @@ def __iter__(self) -> "Iterator[EntryType]":
 
 			with open(
 				join(self._filename, nextPath),
-				"r",
 				encoding=self._encoding,
 			) as _file:
 				header = _file.readline().rstrip()

diff --git a/pyglossary/plugins/gettext_po.py b/pyglossary/plugins/gettext_po.py
@@ -135,7 +135,7 @@ def __init__(self, glos: GlossaryType) -> None:
 
 	def open(self, filename: str) -> None:
 		self._filename = filename
-		self._file = _file = open(filename, mode="wt", encoding="utf-8")
+		self._file = _file = open(filename, mode="w", encoding="utf-8")
 		_file.write('#\nmsgid ""\nmsgstr ""\n')
 		for key, value in self._glos.iterInfo():
 			_file.write(f'"{key}: {value}\\n"\n')

diff --git a/pyglossary/plugins/html_dir.py b/pyglossary/plugins/html_dir.py
@@ -167,7 +167,6 @@ def fixLinks(self, linkTargetSet: "set[str]") -> None:
 			if word not in linkTargetSet:
 				continue
 			if word in fileByWord:
-				# log.info(f'fileByWord[{word}]={fileByWord[word]}, filename={filename}')
 				fileByWord[word].append((filename, entryIndex))
 			else:
 				fileByWord[word] = [(filename, entryIndex)]
@@ -484,7 +483,10 @@ def addLinks(text: str, pos: int) -> None:
 				'<hr>\n'
 			)
 			pos = fileObj.tell()
-			if pos > initFileSizeMax and pos > max_file_size - len(text.encode(encoding)):
+			if (
+				pos > initFileSizeMax and
+				pos > max_file_size - len(text.encode(encoding))
+			):
 				fileObj = self.nextFile()
 				fileObj.write(pageHeader(
 					len(self._filenameList) - 1,

diff --git a/pyglossary/plugins/info_plugin.py b/pyglossary/plugins/info_plugin.py
@@ -38,7 +38,7 @@ def __init__(self, glos: GlossaryType) -> None:
 
 	def open(self, filename: str) -> None:
 		self._filename = filename
-		self._file = open(filename, mode="wt", encoding="utf-8")
+		self._file = open(filename, mode="w", encoding="utf-8")
 
 	def finish(self) -> None:
 		self._filename = ""
@@ -172,7 +172,7 @@ def close(self) -> None:
 	def open(self, filename: str) -> None:
 		from pyglossary.json_utils import jsonToOrderedData
 
-		with open(filename, "r", encoding="utf-8") as infoFp:
+		with open(filename, encoding="utf-8") as infoFp:
 			info = jsonToOrderedData(infoFp.read())
 		for key, value in info.items():
 			self._glos.setInfo(key, value)

diff --git a/pyglossary/plugins/sql.py b/pyglossary/plugins/sql.py
@@ -54,7 +54,7 @@ def finish(self) -> None:
 
 	def open(self, filename: str) -> None:
 		self._filename = filename
-		self._file = open(filename, "wt", encoding=self._encoding)
+		self._file = open(filename, "w", encoding=self._encoding)
 		self._writeInfo()
 
 	def _writeInfo(self) -> None:

diff --git a/pyglossary/plugins/wordnet.py b/pyglossary/plugins/wordnet.py
@@ -261,7 +261,8 @@ def a(word):
 						symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol]
 					except KeyError:
 						print(
-							f"WARNING: unknown pointer symbol {symbol} for {synset.ss_type} ",
+							f"WARNING: unknown pointer symbol {symbol}"
+							f" for {synset.ss_type} ",
 						)
 						symbol_desc = symbol
 

diff --git a/pyglossary/slob.py b/pyglossary/slob.py
@@ -1138,7 +1138,8 @@ def __init__(
 
 		self.current_bin: "BinMemWriter | None" = None
 
-		created_at = os.getenv("SLOB_TIMESTAMP") or datetime.now(timezone.utc).isoformat()
+		created_at = os.getenv("SLOB_TIMESTAMP") or \
+			datetime.now(timezone.utc).isoformat()
 
 		self.blob_count = 0
 		self.ref_count = 0
@@ -1310,6 +1311,14 @@ def _sort(self) -> None:
 	def _resolve_aliases(self) -> None:
 		self._fire_event('begin_resolve_aliases')
 		self.f_aliases.finalize()
+
+		def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]":
+			key_frag = pickle.loads(item.content)
+			if isinstance(key_frag, str):
+				return key_frag, default_fragment
+			to_key, fragment = key_frag
+			return to_key, fragment
+
 		with MultiFileReader(
 			self.f_ref_positions.name,
 			self.f_refs.name,
@@ -1327,13 +1336,6 @@ def _resolve_aliases(self) -> None:
 					version_info=False,
 				)
 
-				def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]":
-					key_frag = pickle.loads(item.content)
-					if isinstance(key_frag, str):
-						return key_frag, default_fragment
-					to_key, fragment = key_frag
-					return to_key, fragment
-
 				for item in aliasesSlob:
 					from_key = item.key
 					keys = set()
@@ -1408,19 +1410,19 @@ def finalize(self) -> None:
 
 		buf_size = 10 * 1024 * 1024
 
+		def write_tags(tags: "MappingProxyType[str, Any]", f: "StructWriter") -> None:
+			f.write(pack(U_CHAR, len(tags)))
+			for key, value in tags.items():
+				f.write_tiny_text(key)
+				f.write_tiny_text(value, editable=True)
+
 		with fopen(self.filename, mode='wb') as output_file:
 			out = StructWriter(output_file, self.encoding)
 			out.write(MAGIC)
 			out.write(uuid4().bytes)
 			out.write_tiny_text(self.encoding, encoding=UTF8)
 			out.write_tiny_text(self.compression)
 
-			def write_tags(tags: "MappingProxyType[str, Any]", f: "StructWriter") -> None:
-				f.write(pack(U_CHAR, len(tags)))
-				for key, value in tags.items():
-					f.write_tiny_text(key)
-					f.write_tiny_text(value, editable=True)
-
 			write_tags(self.tags, out)
 
 			def write_content_types(

diff --git a/pyglossary/ui/gtk3_utils/about.py b/pyglossary/ui/gtk3_utils/about.py
@@ -85,7 +85,8 @@ def newTabWidgetTextView(
 		tv.set_cursor_visible(False)
 		tv.set_border_width(10)
 		buf = tv.get_buffer()
-		# buf.insert_markup(buf.get_end_iter(), markup=text, len=len(text.encode("utf-8")))
+		# buf.insert_markup(buf.get_end_iter(), markup=text,
+		# len=len(text.encode("utf-8")))
 		buf.set_text(text)
 		tv.show_all()
 		swin = gtk.ScrolledWindow()