Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements basic BOM/Codec detection, PEP compliance and style guide pass, Python3 modernisation #2

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 108 additions & 46 deletions valve_keyvalues_python/keyvalues.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,48 @@
__author__ = "Jiri Novotny"
__version__ = "1.0.0"


# Source: https://github.com/gorgitko/valve-keyvalues-python


class KeyValues(dict):
"""
Class for manipulation with Valve KeyValue (KV) files (VDF format). Parses the KV file to object with dict interface.
Class for manipulation with Valve KeyValue (KV) files (VDF format). Parses the KV file to object with dict
interface.

Allows to write objects with dict interface to KV files.
"""

__re = __import__('re')
__sys = __import__('sys')
__codecs = __import__('codecs')
__OrderedDict = __import__('collections').OrderedDict
__regexs = {
"key": __re.compile(r"""(['"])(?P<key>((?!\1).)*)\1(?!.)""", __re.I),
"key_value": __re.compile(r"""(['"])(?P<key>((?!\1).)*)\1(\s+|)['"](?P<value>((?!\1).)*)\1""", __re.I)
}

def __init__(self, mapper=None, filename=None, encoding="utf-8", mapper_type=__OrderedDict, key_modifier=None, key_sorter=None):

def __init__(self, mapper=None, filename=None, encoding="utf-8", mapper_type=__OrderedDict, key_modifier=None,
key_sorter=None, ignore_bom=False):
"""
:param mapper: initialize with own dict-like mapper
:param filename: filename of KV file, which will be parsed to dict structure. Mapper param must not be specified when using this param!
:param filename: filename of KV file, which will be parsed to dict structure. Mapper param must not be specified
when using this param!
:param encoding: KV file encoding. Default: 'utf-8'
:param mapper_type: which mapper will be used for storing KV. It must have the dict interface, i.e. allow to do the 'mapper[key] = value action'.
default: 'collections.OrderedDict'
For example you can use the 'dict' type.
:param key_modifier: function for modifying the keys, e.g. the function 'string.lower' will make all the keys lower
:param key_sorter: function for sorting the keys when dumping/writing/str, e.g. using the function 'sorted' will show KV keys in alphabetical order
:param mapper_type: which mapper will be used for storing KV. It must have the dict interface, i.e. allow to do
the 'mapper[key] = value action'.
default: 'collections.OrderedDict'
For example you can use the 'dict' type.
:param key_modifier: function for modifying the keys, e.g. the function 'string.lower' will make all the keys
lower
:param key_sorter: function for sorting the keys when dumping/writing/str, e.g. using the function 'sorted'
will show KV keys in alphabetical order
:param ignore_bom: Manual override flag to ignore the BOM marker at the start of the file and use the specified
encoding. Normal use should never see the user modifying this flag. Does nothing if filename
is not specified.
"""

super().__init__()
self.__sys.setrecursionlimit(100000)
self.mapper_type = type(mapper) if mapper else mapper_type
self.key_modifier = key_modifier
Expand All @@ -40,8 +56,8 @@ def __init__(self, mapper=None, filename=None, encoding="utf-8", mapper_type=__O
self.__mapper = mapper
return

if type(filename) == str:
self.parse(filename, encoding=encoding)
if isinstance(filename, str):
self.parse(filename, encoding=encoding, ignore_bom=ignore_bom)
else:
raise Exception("'filename' argument must be string!")

Expand All @@ -52,7 +68,7 @@ def __getitem__(self, key):
return self.__mapper[key]

def __repr__(self):
#return repr(self.__mapper)
# return repr(self.__mapper)
return self.dump(self.__mapper)

def __len__(self):
Expand All @@ -73,8 +89,9 @@ def copy(self):
def has_key(self, k):
return self.__mapper.has_key(k)

def pop(self, k, d=None):
return self.__mapper.pop(k, d)
# This function gets redef'd below and never used
# def pop(self, k, d=None):
# return self.__mapper.pop(k, d)

def update(self, *args, **kwargs):
return self.__mapper.update(*args, **kwargs)
Expand All @@ -91,8 +108,9 @@ def items(self):
def pop(self, *args):
return self.__mapper.pop(*args)

def __cmp__(self, dict):
return cmp(self.__mapper, dict)
def __cmp__(self, _dict):
# cmp() does not exist in Py3, but we can just refer to the py3 dict __cmp__ implementation
return super().__cmp__(self.__mapper, _dict)

def __contains__(self, item):
return item in self.__mapper
Expand All @@ -101,7 +119,8 @@ def __iter__(self):
return iter(self.__mapper)

def __unicode__(self):
return unicode(repr(self.__mapper))
# str is the idiomatic replacement for Py2's 'unicode()' function
return str(repr(self.__mapper))

def __str__(self):
return self.dump()
Expand Down Expand Up @@ -141,20 +160,21 @@ def __parse(self, lines, mapper_type, i=0, key_modifier=None):
if lines[i].startswith("{"):
if not key:
raise Exception("'{{' found without key at line {}".format(i + 1))
_mapper[key], i = self.__parse(lines, i=i+1, mapper_type=mapper_type, key_modifier=key_modifier)
_mapper[key], i = self.__parse(lines, i=i + 1, mapper_type=mapper_type, key_modifier=key_modifier)
continue
elif lines[i].startswith("}"):
return _mapper, i + 1
elif self.__re.match(self.__regexs["key"], lines[i]):
key = self.__key_modifier(self.__re.search(self.__regexs["key"], lines[i]).group("key"), key_modifier)
key = self.__key_modifier(self.__re.search(self.__regexs["key"], lines[i]).group("key"),
key_modifier)
i += 1
continue
elif self.__re.match(self.__regexs["key_value"], lines[i]):
groups = self.__re.search(self.__regexs["key_value"], lines[i])
_mapper[self.__key_modifier(groups.group("key"), key_modifier)] = groups.group("value")
i += 1
elif self.__re.match(self.__regexs["key_value"], lines[i] + lines[i+1]):
groups = self.__re.search(self.__regexs["key_value"], lines[i] + " " + lines[i+1])
elif self.__re.match(self.__regexs["key_value"], lines[i] + lines[i + 1]):
groups = self.__re.search(self.__regexs["key_value"], lines[i] + " " + lines[i + 1])
_mapper[self.__key_modifier(groups.group("key"), key_modifier)] = groups.group("value")
i += 1
else:
Expand All @@ -164,26 +184,66 @@ def __parse(self, lines, mapper_type, i=0, key_modifier=None):

return _mapper

def parse(self, filename, encoding="utf-8", mapper_type=__OrderedDict, key_modifier=None):
@staticmethod
def _codec_detection(path, default) -> str:
"""
Reads the front of a file to see if a [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
exists. If so, it returns the codec that BOM usually corresponds to. A KV file with a BOM at the start that
is improperly stripped will fail to detect the first Key line as the line doesn't start with '"' or an ASCII
char.

SOURCE: https://stackoverflow.com/a/24370596
LICENSE: CC BY-SA 4.0
AUTHOR: ivan_pozdeev

:param path: The KV File path
:param default: The default chosen encoding (usually UTF8)
:return: A string of the encoding of the file (i.e. 'utf-8-sig', 'utf8', 'utf16', etc...)
"""
with open(path, 'rb') as f:
raw = f.read(4) # will read less if the file is smaller
# BOM_UTF32_LE's start is equal to BOM_UTF16_LE so need to try the former first
for enc, boms in \
('utf-8-sig', (KeyValues.__codecs.BOM_UTF8,)), \
('utf-32', (KeyValues.__codecs.BOM_UTF32_LE, KeyValues.__codecs.BOM_UTF32_BE)), \
('utf-16', (KeyValues.__codecs.BOM_UTF16_LE, KeyValues.__codecs.BOM_UTF16_BE)):
if any(raw.startswith(bom) for bom in boms):
return enc
return default

def parse(self, filename, encoding="utf-8", mapper_type=__OrderedDict, key_modifier=None, ignore_bom=False):
"""
Parses the KV file so this instance can be accessed by dict interface.

:param filename: name of KV file
:param encoding: KV file encoding. Default: 'utf-8'
:param mapper_type: which mapper will be used for storing KV. It must have the dict interface, i.e. allow to do the 'mapper[key] = value action'.
default: 'collections.OrderedDict'
For example you can use the 'dict' type.
This will override the instance's 'mapper_type' if specified during instantiation.
:param key_modifier: function for modifying the keys, e.g. the function 'string.lower' will make all the keys lower.
This will override the instance's 'key_modifier' if specified during instantiation.
:param filename: name of the KV file
:param encoding: The encoding of the KV file. Default: 'utf-8'. Some KV files are UTF16-LE encoded.
:param mapper_type: which mapper will be used for storing KV. It must have the dict interface,
i.e. allow to do the 'mapper[key] = value action'.
default: 'collections.OrderedDict'
For example you can use the 'dict' type. This will override the instance's 'mapper_type'
if specified during instantiation.
:param key_modifier: function for modifying the keys, e.g. the function 'string.lower' will make all the keys
lower. This will override the instance's 'key_modifier' if specified during instantiation.
:param ignore_bom: Manual override flag to ignore the BOM marker at the start of the file and use the specified
encoding. Normal use should never see the user modifying this flag.
"""

with open(filename, mode="r", encoding=encoding) as f:
self.__mapper = self.__parse([line.strip() for line in f.readlines()],
mapper_type=mapper_type or self.mapper_type,
key_modifier=key_modifier or self.key_modifier)

def __tab(self, string, level, quotes=False):
_encoding = encoding
_determined_codec = KeyValues._codec_detection(filename, _encoding)
if _determined_codec != _encoding:
print(f"Warning - The file codec was detected to be {_determined_codec.upper()} but {_encoding} was "
f"provided. Automatically using {_determined_codec.upper()} (pass ignore_bom=True to bypass this).")
if not ignore_bom:
_encoding = _determined_codec

with open(filename, mode="r", encoding=_encoding) as f:
lines = [line.strip() for line in f.readlines()]

self.__mapper = self.__parse(lines,
mapper_type=mapper_type or self.mapper_type,
key_modifier=key_modifier or self.key_modifier)

@staticmethod
def __tab(string, level, quotes=False):
if quotes:
return '{}"{}"'.format(level * "\t", string)
else:
Expand All @@ -198,13 +258,13 @@ def __dump(self, mapper, key_sorter=None, level=0):
keys = mapper.keys()

for key in keys:
string += self.__tab(key, level, quotes=True)
if type(mapper[key]) == str:
string += KeyValues.__tab(key, level, quotes=True)
if isinstance(mapper[key], str):
string += '\t "{}"\n'.format(mapper[key])
else:
string += "\n" + self.__tab("{\n", level)
string += self.__dump(mapper[key], key_sorter=key_sorter, level=level+1)
string += self.__tab("}\n", level)
string += "\n" + KeyValues.__tab("{\n", level)
string += self.__dump(mapper[key], key_sorter=key_sorter, level=level + 1)
string += KeyValues.__tab("}\n", level)

return string

Expand All @@ -213,8 +273,9 @@ def dump(self, mapper=None, key_sorter=None):
Dumps the KeyValues mapper to string.

:param mapper: you can dump your own object with dict interface
:param key_sorter: function for sorting the keys when dumping/writing/str, e.g. using the function 'sorted' will show KV in alphabetical order.
This will override the instance's 'key_sorter' if specified during instantiation.
:param key_sorter: function for sorting the keys when dumping/writing/str, e.g. using the function 'sorted' will
show KV in alphabetical order. This will override the instance's 'key_sorter' if specified
during instantiation.
:return: string
"""

Expand All @@ -227,8 +288,9 @@ def write(self, filename, encoding="utf-8", mapper=None, key_sorter=None):
:param filename: output KV file name
:param encoding: output KV file encoding. Default: 'utf-8'
:param mapper: you can write your own object with dict interface
:param key_sorter: key_sorter: function for sorting the keys when dumping/writing/str, e.g. using the function 'sorted' will show KV in alphabetical order.
This will override the instance's 'key_sorter' if specified during instantiation.
:param key_sorter: key_sorter: function for sorting the keys when dumping/writing/str, e.g. using the function
'sorted' will show KV in alphabetical order.
This will override the instance's 'key_sorter' if specified during instantiation.
"""

with open(filename, mode="w", encoding=encoding) as f:
Expand Down