Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ruff #514

Merged
merged 4 commits into from
Sep 17, 2023
Merged

ruff #514

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: "Ruff"

on:
push:
branches: [ master ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
schedule:
- cron: '33 1 * * 3'

jobs:
ruff:
name: "See: docs.astral.sh/ruff"
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3
- name: cd and ls
run: |
cd ${{ github.workspace }}
ls -l
- name: Download ruff
run: |
wget -c https://github.com/astral-sh/ruff/releases/download/v0.0.290/ruff-x86_64-unknown-linux-gnu.tar.gz
tar -xzf ruff-x86_64-unknown-linux-gnu.tar.gz
ls -l ruff
chmod a+x ruff
- name: Run ruff
run: ./ruff ./pyglossary
9 changes: 6 additions & 3 deletions pyglossary/glossary_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,14 @@ class GlossaryCommon(GlossaryInfo, GlossaryProgress, PluginManager):

These methods do not exist in glossary_v2.py (but still exist in glossary.py)

- read(): you can use directRead() then iterate over glossary
- read():
you can use directRead() then iterate over glossary

- sortWords(): you have to sort entries yourself (when adding or after directRead)
- sortWords():
you have to sort entries yourself (when adding or after directRead)

- updateIter(): no longer needed, and does't do anything in glossary.py
- updateIter():
no longer needed, and does't do anything in glossary.py

"""

Expand Down
20 changes: 15 additions & 5 deletions pyglossary/plugin_lib/readmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ def _decode_key_block_info(self, key_block_info_compressed):
key_block_info[i:i+self._number_width],
)[0]
i += self._number_width
key_block_info_list += [(key_block_compressed_size, key_block_decompressed_size)]
key_block_info_list.append(
(key_block_compressed_size, key_block_decompressed_size),
)

# assert num_entries == self._num_entries

Expand Down Expand Up @@ -363,7 +365,8 @@ def _read_header(self):
# encryption flag
# 0x00 - no encryption, "Allow export to text" is checked in MdxBuilder 3.
# 0x01 - encrypt record block, "Encryption Key" is given in MdxBuilder 3.
# 0x02 - encrypt key info block, "Allow export to text" is unchecked in MdxBuilder 3.
# 0x02 - encrypt key info block,
# "Allow export to text" is unchecked in MdxBuilder 3.
if b'Encrypted' not in header_tag or header_tag[b'Encrypted'] == b'No':
self._encrypt = 0
elif header_tag[b'Encrypted'] == b'Yes':
Expand Down Expand Up @@ -485,7 +488,8 @@ def _read_keys_v1v2(self):
adler32 = unpack('>I', f.read(4))[0]
assert adler32 == (zlib.adler32(block) & 0xffffffff)

# read key block info, which indicates key block's compressed and decompressed size
# read key block info, which indicates key block's compressed
# and decompressed size
key_block_info = f.read(key_block_info_size)
key_block_info_list = self._decode_key_block_info(key_block_info)
assert num_key_blocks == len(key_block_info_list)
Expand Down Expand Up @@ -572,7 +576,10 @@ def _read_records_v3(self):
for _ in range(num_record_blocks):
decompressed_size = self._read_int32(f)
compressed_size = self._read_int32(f)
record_block = self._decode_block(f.read(compressed_size), decompressed_size)
record_block = self._decode_block(
f.read(compressed_size),
decompressed_size,
)

# split record block according to the offset info from key block
while i < len(self._key_list):
Expand Down Expand Up @@ -618,7 +625,10 @@ def _read_records_v1v2(self):
for compressed_size, decompressed_size in record_block_info_list:
record_block_compressed = f.read(compressed_size)
try:
record_block = self._decode_block(record_block_compressed, decompressed_size)
record_block = self._decode_block(
record_block_compressed,
decompressed_size,
)
except zlib.error:
log.error("zlib decompress error")
log.debug(f"record_block_compressed = {record_block_compressed!r}")
Expand Down
8 changes: 6 additions & 2 deletions pyglossary/plugin_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ def error(msg: str) -> None:
if not inputFilename:
return error(f"Invalid filename {filename!r}") # type: ignore
if not plugin:
return error("No filename nor format is given for output file") # type: ignore
return error(
"No filename nor format is given for output file",
) # type: ignore
filename = splitext(inputFilename)[0] + plugin.ext
return DetectedFormat(filename, plugin.name, "")

Expand All @@ -307,7 +309,9 @@ def error(msg: str) -> None:
return error("Unable to detect output format!") # type: ignore

if not plugin.canWrite:
return error(f"plugin {plugin.name} does not support writing") # type: ignore
return error(
f"plugin {plugin.name} does not support writing",
) # type: ignore

if compression in getattr(plugin.writerClass, "compressions", []):
compression = ""
Expand Down
5 changes: 4 additions & 1 deletion pyglossary/plugin_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,10 @@ def getOptionsFromClass(self, rwclass: "type") -> "dict[str, Any]":
continue
prop = optionsProp[name]
if prop.disabled:
core.trace(log, f"skipping disabled option {name} in {self.name} plugin")
core.trace(
log,
f"skipping disabled option {name} in {self.name} plugin",
)
continue
if not prop.validate(default):
log.warning(
Expand Down
18 changes: 13 additions & 5 deletions pyglossary/plugins/appledict_bin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def __init__(self, glos: GlossaryType) -> None:

def tostring(
self,
elem: "Element | HtmlComment | HtmlElement | HtmlEntity | HtmlProcessingInstruction",
elem: "Element | HtmlComment | HtmlElement"
" | HtmlEntity | HtmlProcessingInstruction",
) -> str:
from lxml.html import tostring as tostring

Expand Down Expand Up @@ -461,7 +462,9 @@ def readEntryIds(self) -> None:
continue
title_j = entryBytes.find(b'"', title_i + 9)
if title_j < 0:
log.error(f"title closing not found: {entryBytes.decode(self._encoding)}")
log.error(
f"title closing not found: {entryBytes.decode(self._encoding)}",
)
continue
titleById[_id] = entryBytes[title_i + 9: title_j].decode(self._encoding)

Expand Down Expand Up @@ -493,7 +496,9 @@ def setKeyTextData(
decompressedSectionByteLen = readInt(keyTextFile)
if compressedSectionByteLen == decompressedSectionByteLen == 0:
break
chunk_section_compressed = keyTextFile.read(compressedSectionByteLen - 4)
chunk_section_compressed = keyTextFile.read(
compressedSectionByteLen - 4,
)
chunksection_bytes = decompress(chunk_section_compressed )
buff.write(chunksection_bytes)
fileLimitDecompressed += decompressedSectionByteLen
Expand Down Expand Up @@ -546,7 +551,8 @@ def readKeyTextData(
small_len = read_2_bytes_here(buff) # 0x2c
curr_offset = buff.tell()
next_lexeme_offset = curr_offset + small_len
# the resulting number must match with Contents/Body.data address of the entry
# the resulting number must match with Contents/Body.data
# address of the entry
articleAddress: ArticleAddress
if properties.body_has_sections:
chunkOffset = readInt(buff)
Expand Down Expand Up @@ -589,7 +595,9 @@ def readKeyTextData(
# d:priority=".." between 0x00..0x12, priority = [0..9]
priority = (priorityAndParentalControl - parentalControl) // 2
else:
log.error(f"Unknown private field: {properties.key_text_fixed_fields}")
log.error(
f"Unknown private field: {properties.key_text_fixed_fields}",
)
return {}

keyTextFields: "list[str]" = []
Expand Down
9 changes: 6 additions & 3 deletions pyglossary/plugins/appledict_bin/appledict_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class AppleDictProperties:
key_text_fixed_fields: list[str]

# in plist file: "IDXIndexDataFields" / "IDXVariableDataFields"
# Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle", "DCSAnchor", "DCSYomiWord"]
# Example: ["DCSKeyword", "DCSHeadword", "DCSEntryTitle",
# "DCSAnchor", "DCSYomiWord"]
key_text_variable_fields: list[str]

# DCSDictionaryCSS, generally "DefaultStyle.css"
Expand Down Expand Up @@ -73,8 +74,10 @@ def from_metadata(metadata: dict) -> AppleDictProperties:
external_data_fields[0].get("IDXDataSize") == 8
)

if 'TrieAuxiliaryDataOptions' in key_text_metadata and 'HeapDataCompressionType' in \
key_text_metadata['TrieAuxiliaryDataOptions']:
if (
'TrieAuxiliaryDataOptions' in key_text_metadata and
'HeapDataCompressionType' in key_text_metadata['TrieAuxiliaryDataOptions']
):
key_text_compression_type = \
key_text_metadata['TrieAuxiliaryDataOptions']['HeapDataCompressionType']
else:
Expand Down
2 changes: 1 addition & 1 deletion pyglossary/plugins/cc_cedict/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def open(self, filename: str) -> None:
self._glos.sourceLangName = "Chinese"
self._glos.targetLangName = "English"

self.file = open(filename, "r", encoding=self._encoding)
self.file = open(filename, encoding=self._encoding)
for line in self.file:
match = entry_count_reg.match(line)
if match is not None:
Expand Down
2 changes: 1 addition & 1 deletion pyglossary/plugins/crawler_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def open(self, filename: str) -> None:

self._filename = filename

with open(join(filename, "info.json"), "r", encoding="utf-8") as infoFp:
with open(join(filename, "info.json"), encoding="utf-8") as infoFp:
info = jsonToOrderedData(infoFp.read())
self._wordCount = info.pop("wordCount")
for key, value in info.items():
Expand Down
3 changes: 1 addition & 2 deletions pyglossary/plugins/edlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def open(self, filename: str) -> None:
)
self._filename = filename

with open(infoFname, "r", encoding=self._encoding) as infoFp:
with open(infoFname, encoding=self._encoding) as infoFp:
info = jsonToOrderedData(infoFp.read())
self._wordCount = info.pop("wordCount")
self._prev_link = info.pop("prev_link")
Expand Down Expand Up @@ -121,7 +121,6 @@ def __iter__(self) -> "Iterator[EntryType]":

with open(
join(self._filename, nextPath),
"r",
encoding=self._encoding,
) as _file:
header = _file.readline().rstrip()
Expand Down
2 changes: 1 addition & 1 deletion pyglossary/plugins/gettext_po.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def __init__(self, glos: GlossaryType) -> None:

def open(self, filename: str) -> None:
self._filename = filename
self._file = _file = open(filename, mode="wt", encoding="utf-8")
self._file = _file = open(filename, mode="w", encoding="utf-8")
_file.write('#\nmsgid ""\nmsgstr ""\n')
for key, value in self._glos.iterInfo():
_file.write(f'"{key}: {value}\\n"\n')
Expand Down
6 changes: 4 additions & 2 deletions pyglossary/plugins/html_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ def fixLinks(self, linkTargetSet: "set[str]") -> None:
if word not in linkTargetSet:
continue
if word in fileByWord:
# log.info(f'fileByWord[{word}]={fileByWord[word]}, filename={filename}')
fileByWord[word].append((filename, entryIndex))
else:
fileByWord[word] = [(filename, entryIndex)]
Expand Down Expand Up @@ -484,7 +483,10 @@ def addLinks(text: str, pos: int) -> None:
'<hr>\n'
)
pos = fileObj.tell()
if pos > initFileSizeMax and pos > max_file_size - len(text.encode(encoding)):
if (
pos > initFileSizeMax and
pos > max_file_size - len(text.encode(encoding))
):
fileObj = self.nextFile()
fileObj.write(pageHeader(
len(self._filenameList) - 1,
Expand Down
4 changes: 2 additions & 2 deletions pyglossary/plugins/info_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, glos: GlossaryType) -> None:

def open(self, filename: str) -> None:
self._filename = filename
self._file = open(filename, mode="wt", encoding="utf-8")
self._file = open(filename, mode="w", encoding="utf-8")

def finish(self) -> None:
self._filename = ""
Expand Down Expand Up @@ -172,7 +172,7 @@ def close(self) -> None:
def open(self, filename: str) -> None:
from pyglossary.json_utils import jsonToOrderedData

with open(filename, "r", encoding="utf-8") as infoFp:
with open(filename, encoding="utf-8") as infoFp:
info = jsonToOrderedData(infoFp.read())
for key, value in info.items():
self._glos.setInfo(key, value)
Expand Down
2 changes: 1 addition & 1 deletion pyglossary/plugins/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def finish(self) -> None:

def open(self, filename: str) -> None:
self._filename = filename
self._file = open(filename, "wt", encoding=self._encoding)
self._file = open(filename, "w", encoding=self._encoding)
self._writeInfo()

def _writeInfo(self) -> None:
Expand Down
3 changes: 2 additions & 1 deletion pyglossary/plugins/wordnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ def a(word):
symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol]
except KeyError:
print(
f"WARNING: unknown pointer symbol {symbol} for {synset.ss_type} ",
f"WARNING: unknown pointer symbol {symbol}"
f" for {synset.ss_type} ",
)
symbol_desc = symbol

Expand Down
30 changes: 16 additions & 14 deletions pyglossary/slob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,8 @@ def __init__(

self.current_bin: "BinMemWriter | None" = None

created_at = os.getenv("SLOB_TIMESTAMP") or datetime.now(timezone.utc).isoformat()
created_at = os.getenv("SLOB_TIMESTAMP") or \
datetime.now(timezone.utc).isoformat()

self.blob_count = 0
self.ref_count = 0
Expand Down Expand Up @@ -1310,6 +1311,14 @@ def _sort(self) -> None:
def _resolve_aliases(self) -> None:
self._fire_event('begin_resolve_aliases')
self.f_aliases.finalize()

def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]":
key_frag = pickle.loads(item.content)
if isinstance(key_frag, str):
return key_frag, default_fragment
to_key, fragment = key_frag
return to_key, fragment

with MultiFileReader(
self.f_ref_positions.name,
self.f_refs.name,
Expand All @@ -1327,13 +1336,6 @@ def _resolve_aliases(self) -> None:
version_info=False,
)

def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]":
key_frag = pickle.loads(item.content)
if isinstance(key_frag, str):
return key_frag, default_fragment
to_key, fragment = key_frag
return to_key, fragment

for item in aliasesSlob:
from_key = item.key
keys = set()
Expand Down Expand Up @@ -1408,19 +1410,19 @@ def finalize(self) -> None:

buf_size = 10 * 1024 * 1024

def write_tags(tags: "MappingProxyType[str, Any]", f: "StructWriter") -> None:
f.write(pack(U_CHAR, len(tags)))
for key, value in tags.items():
f.write_tiny_text(key)
f.write_tiny_text(value, editable=True)

with fopen(self.filename, mode='wb') as output_file:
out = StructWriter(output_file, self.encoding)
out.write(MAGIC)
out.write(uuid4().bytes)
out.write_tiny_text(self.encoding, encoding=UTF8)
out.write_tiny_text(self.compression)

def write_tags(tags: "MappingProxyType[str, Any]", f: "StructWriter") -> None:
f.write(pack(U_CHAR, len(tags)))
for key, value in tags.items():
f.write_tiny_text(key)
f.write_tiny_text(value, editable=True)

write_tags(self.tags, out)

def write_content_types(
Expand Down
3 changes: 2 additions & 1 deletion pyglossary/ui/gtk3_utils/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def newTabWidgetTextView(
tv.set_cursor_visible(False)
tv.set_border_width(10)
buf = tv.get_buffer()
# buf.insert_markup(buf.get_end_iter(), markup=text, len=len(text.encode("utf-8")))
# buf.insert_markup(buf.get_end_iter(), markup=text,
# len=len(text.encode("utf-8")))
buf.set_text(text)
tv.show_all()
swin = gtk.ScrolledWindow()
Expand Down
Loading