Skip to content

Commit

Permalink
cleanup defined index
Browse files Browse the repository at this point in the history
Signed-off-by: Praneeth Bedapudi <praneeth@bpraneeth.com>
  • Loading branch information
bedapudi6788 committed Dec 11, 2023
1 parent 5b9d24d commit 414e045
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 37 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ pip install --upgrade liteindex
- works across threads, processes seamlessly
- compression is supported natively and optional custom compression dictionaries can be built

### KVStore
- [Documentation]() | [Detailed example]() | [Benchmarks]()
- screaming fast reads, full multi thread process support, based on lmdb
- can store any python objects, no querying supported
- Eviction policies supported: `LRU`, `LFU`, `any` and age based invalidation

### function_cache
- [Documentation](https://github.com/notAI-tech/LiteIndex/blob/main/function_cache.md) | [Detailed example](https://github.com/notAI-tech/LiteIndex/blob/main/examples/function_cache_example.py) | [Benchmarks](https://github.com/notAI-tech/LiteIndex/tree/main/benchmarks/function_cache)
- based on DefinedIndex, easy to use decorator for caching heavy function calls with arguments
- works across threads, processes seamlessly
- ultra fast, works across threads, processes seamlessly
- Eviction policies supported: `LRU`, `LFU`, `any` and age based invalidation
118 changes: 84 additions & 34 deletions liteindex/kv_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,39 @@
import hashlib
import tempfile


class KVIndex:
EvictNone = "none"
EvictAny = "any"

def __init__(self, dir=tempfile.mkdtemp(), fast_mode=False, store_key=True, eviction_policy=EvictNone, max_size_mb=0, max_no_of_elements=0):
def __init__(
self,
dir=tempfile.mkdtemp(),
fast_mode=False,
store_key=True,
eviction_policy=EvictNone,
max_size_mb=0,
max_no_of_elements=0,
):
self.fast_mode = fast_mode
self.store_key = store_key

self.eviction_policy = eviction_policy

if self.eviction_policy == KVIndex.EvictNone:
self.max_no_of_elements = 0
self.max_size_mb = 0
elif self.eviction_policy == KVIndex.EvictAny:
if max_size_mb == 0 and max_no_of_elements == 0:
raise ValueError("At least one of max_size_mb or max_no_of_elements must be set when eviction_policy is EvictAny")
raise ValueError(
"At least one of max_size_mb or max_no_of_elements must be set when eviction_policy is EvictAny"
)
else:
raise ValueError(f"Unknown eviction_policy: {eviction_policy}")

self.max_size_mb = max_size_mb
self.max_no_of_elements = max_no_of_elements

self.__env = lmdb.open(
path=dir,
subdir=True,
Expand All @@ -37,20 +48,24 @@ def __init__(self, dir=tempfile.mkdtemp(), fast_mode=False, store_key=True, evic
writemap=fast_mode,
max_readers=2048,
meminit=False,
max_dbs=4
max_dbs=4,
)

self.__key_hash_to_value_db = self.__env.open_db(b"key_hash_to_value", create=True)
self.__key_hash_to_value_db = self.__env.open_db(
b"key_hash_to_value", create=True
)

if self.store_key:
self.__key_hash_to_key_db = self.__env.open_db(b"key_hash_to_key", create=True)

def __evict(self, txn, min_to_delete=1):
self.__key_hash_to_key_db = self.__env.open_db(
b"key_hash_to_key", create=True
)

def __evict(self, txn, min_to_delete=1):
if self.max_size_mb:
stat = txn.stat(db=self.__key_hash_to_value_db)
current_size_mb = stat['psize'] * stat['leaf_pages'] / 1024**2
current_record_count = stat['entries']

current_size_mb = stat["psize"] * stat["leaf_pages"] / 1024**2
current_record_count = stat["entries"]
if current_size_mb <= self.max_size_mb * 0.8:
return

Expand All @@ -65,16 +80,16 @@ def __evict(self, txn, min_to_delete=1):
cursor.delete()
else:
break

stat = txn.stat(db=self.__key_hash_to_value_db)
current_size_mb = stat['psize'] * stat['leaf_pages'] / 1024**2
current_record_count = stat['entries']
current_size_mb = stat["psize"] * stat["leaf_pages"] / 1024**2
current_record_count = stat["entries"]
if current_size_mb <= self.max_size_mb * 0.8:
cursor.close()
return
elif self.max_no_of_elements:
stat = txn.stat(db=self.__key_hash_to_value_db)
current_record_count = stat['entries']
current_record_count = stat["entries"]
if current_record_count <= self.max_no_of_elements:
return

Expand All @@ -89,25 +104,37 @@ def __evict(self, txn, min_to_delete=1):
cursor.delete()
else:
break


def __setitem__(self, key, value):
with self.__env.begin(write=True) as txn:
self.__evict(txn)
key = pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL) if isinstance(key, str) else key.encode()
pickled_value = pickle.dumps(value, protocol=pickle.HIGHEST_PROTOCOL) if isinstance(value, str) else value.encode()
key = (
pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL)
if isinstance(key, str)
else key.encode()
)
pickled_value = (
pickle.dumps(value, protocol=pickle.HIGHEST_PROTOCOL)
if isinstance(value, str)
else value.encode()
)

key_hash = hashlib.sha256(key).digest()

txn.put(key_hash, pickled_value, db=self.__key_hash_to_value_db, overwrite=True)
txn.put(
key_hash, pickled_value, db=self.__key_hash_to_value_db, overwrite=True
)

if self.store_key:
txn.put(key_hash, key, db=self.__key_hash_to_key_db, overwrite=False)



def __getitem__(self, key):
with self.__env.begin(write=False, buffers=True) as txn:
key = pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL) if isinstance(key, str) else key.encode()
key = (
pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL)
if isinstance(key, str)
else key.encode()
)
key_hash = hashlib.sha256(key).digest()

result = txn.get(key_hash, db=self.__key_hash_to_value_db)
Expand All @@ -116,32 +143,55 @@ def __getitem__(self, key):
raise KeyError(key)

return pickle.loads(result) if result[0] == 128 else result.decode()

def update(self, items):
with self.__env.begin(write=True) as txn:
self.__evict(txn, min_to_delete=len(items))
for key, value in items:
key = pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL) if isinstance(key, str) else key.encode()
pickled_value = pickle.dumps(value, protocol=pickle.HIGHEST_PROTOCOL) if isinstance(value, str) else value.encode()
key = (
pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL)
if isinstance(key, str)
else key.encode()
)
pickled_value = (
pickle.dumps(value, protocol=pickle.HIGHEST_PROTOCOL)
if isinstance(value, str)
else value.encode()
)

key_hash = hashlib.sha256(key).digest()

txn.put(key_hash, pickled_value, db=self.__key_hash_to_value_db, overwrite=True)
txn.put(
key_hash,
pickled_value,
db=self.__key_hash_to_value_db,
overwrite=True,
)

if self.store_key:
txn.put(key_hash, key, db=self.__key_hash_to_key_db, overwrite=False)

txn.put(
key_hash, key, db=self.__key_hash_to_key_db, overwrite=False
)

def getmulti(self, keys):
with self.__env.begin(write=False, buffers=True) as txn:
results = []
for key in keys:
key = pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL) if isinstance(key, str) else key.encode()
key = (
pickle.dumps(key, protocol=pickle.HIGHEST_PROTOCOL)
if isinstance(key, str)
else key.encode()
)
key_hash = hashlib.sha256(key).digest()

result = txn.get(key_hash, db=self.__key_hash_to_value_db)

results.append(pickle.loads(result) if result[0] == 128 else result.decode() if result is not None else None)

return results

results.append(
pickle.loads(result)
if result[0] == 128
else result.decode()
if result is not None
else None
)

return results
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
EMAIL = "praneeth@bpraneeth.com"
AUTHOR = "BEDAPUDI PRANEETH"
REQUIRES_PYTHON = ">=3.6.0"
VERSION = "0.0.2.dev22"
VERSION = "0.0.2.dev23"

# What packages are required for this module to be executed?
REQUIRED = [
Expand Down

0 comments on commit 414e045

Please sign in to comment.