From 35f60b7c3f2a6cdd4ebc7fa39d81b406018aa54e Mon Sep 17 00:00:00 2001 From: sydp Date: Mon, 4 Nov 2024 14:21:22 +1100 Subject: [PATCH] Updates to Chromium IndexedDB parsing (#60) --- dfindexeddb/indexeddb/chromium/definitions.py | 5 ++ dfindexeddb/indexeddb/chromium/record.py | 48 ++++++++++++------- .../dfindexeddb/indexeddb/chromium/record.py | 8 ++-- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/dfindexeddb/indexeddb/chromium/definitions.py b/dfindexeddb/indexeddb/chromium/definitions.py index 3df700b..d5fd742 100644 --- a/dfindexeddb/indexeddb/chromium/definitions.py +++ b/dfindexeddb/indexeddb/chromium/definitions.py @@ -16,6 +16,11 @@ from enum import Enum, IntEnum, IntFlag +REQUIRES_PROCESSING_SSV_PSEUDO_VERSION = 0x11 +REPLACE_WITH_BLOB = 0x01 +COMPRESSED_WITH_SNAPPY = 0x02 + + class DatabaseMetaDataKeyType(IntEnum): """Database Metadata key types.""" ORIGIN_NAME = 0 diff --git a/dfindexeddb/indexeddb/chromium/record.py b/dfindexeddb/indexeddb/chromium/record.py index e24ca3b..4eb1783 100644 --- a/dfindexeddb/indexeddb/chromium/record.py +++ b/dfindexeddb/indexeddb/chromium/record.py @@ -14,6 +14,7 @@ # limitations under the License. """Parses Chromium IndexedDb structures.""" from __future__ import annotations + from dataclasses import dataclass, field from datetime import datetime import io @@ -23,6 +24,8 @@ from typing import Any, BinaryIO, Generator, Optional, Tuple, Type, TypeVar, \ Union +import snappy + from dfindexeddb import errors from dfindexeddb.indexeddb.chromium import blink from dfindexeddb.indexeddb.chromium import definitions @@ -433,7 +436,7 @@ class SchemaVersionKey(BaseIndexedDBKey): def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int: """Decodes the schema version value.""" - return decoder.DecodeVarint()[1] + return decoder.DecodeInt()[1] @classmethod def FromDecoder( @@ -453,7 +456,7 @@ class MaxDatabaseIdKey(BaseIndexedDBKey): def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int: """Decodes the maximum database value.""" - return decoder.DecodeVarint()[1] + return decoder.DecodeInt()[1] @classmethod def FromDecoder( @@ -473,7 +476,7 @@ class DataVersionKey(BaseIndexedDBKey): def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int: """Decodes the data version value.""" - return decoder.DecodeUint64Varint()[1] + return decoder.DecodeInt()[1] @classmethod def FromDecoder( @@ -635,7 +638,7 @@ def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int: The value is the corresponding database ID. """ - return decoder.DecodeVarint()[1] + return decoder.DecodeInt()[1] @classmethod def FromDecoder( @@ -782,7 +785,7 @@ class ObjectStoreNamesKey(BaseIndexedDBKey): def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int: """Decodes the object store names value.""" - return decoder.DecodeVarint()[1] + return decoder.DecodeInt()[1] @classmethod def FromDecoder(cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix, @@ -807,7 +810,7 @@ class IndexNamesKey(BaseIndexedDBKey): def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int: """Decodes the index names value.""" - return decoder.DecodeVarint()[1] + return decoder.DecodeInt()[1] @classmethod def FromDecoder(cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix, @@ -844,7 +847,7 @@ def DecodeValue( return decoder.DecodeString()[1] if (self.metadata_type == definitions.DatabaseMetaDataKeyType.MAX_ALLOCATED_OBJECT_STORE_ID): - return decoder.DecodeVarint()[1] + return decoder.DecodeInt()[1] if (self.metadata_type == definitions.DatabaseMetaDataKeyType.IDB_INTEGER_VERSION): return decoder.DecodeVarint()[1] @@ -969,13 +972,13 @@ class ObjectStoreDataValue: """The parsed values from an ObjectStoreDataKey. Attributes: - unknown: an unknown integer (possibly a sequence number?). + version: the version prefix. is_wrapped: True if the value was wrapped. blob_size: the blob size, only valid if wrapped. blob_offset: the blob offset, only valid if wrapped. value: the blink serialized value, only valid if not wrapped. """ - unknown: int + version: int is_wrapped: bool blob_size: Optional[int] blob_offset: Optional[int] @@ -994,28 +997,41 @@ class ObjectStoreDataKey(BaseIndexedDBKey): def DecodeValue( self, decoder: utils.LevelDBDecoder) -> ObjectStoreDataValue: """Decodes the object store data value.""" - _, unknown_integer = decoder.DecodeVarint() + _, version = decoder.DecodeVarint() _, wrapped_header_bytes = decoder.PeekBytes(3) if len(wrapped_header_bytes) != 3: raise errors.DecoderError('Insufficient bytes') - if (wrapped_header_bytes[0] == definitions.BlinkSerializationTag.VERSION and - wrapped_header_bytes[1] == 0x11 and - wrapped_header_bytes[2] == 0x01): + if (wrapped_header_bytes[0] == + definitions.BlinkSerializationTag.VERSION and + wrapped_header_bytes[1] == + definitions.REQUIRES_PROCESSING_SSV_PSEUDO_VERSION and + wrapped_header_bytes[2] == definitions.REPLACE_WITH_BLOB): + _ = decoder.ReadBytes(3) _, blob_size = decoder.DecodeVarint() _, blob_offset = decoder.DecodeVarint() return ObjectStoreDataValue( - unknown=unknown_integer, + version=version, is_wrapped=True, blob_size=blob_size, blob_offset=blob_offset, value=None) _, blink_bytes = decoder.ReadBytes() + is_wrapped = False + if ( + wrapped_header_bytes[0] == + definitions.BlinkSerializationTag.VERSION and + wrapped_header_bytes[1] == + definitions.REQUIRES_PROCESSING_SSV_PSEUDO_VERSION and + wrapped_header_bytes[2] == definitions.COMPRESSED_WITH_SNAPPY): + is_wrapped = True + # ignore the wrapped header bytes when decompressing + blink_bytes = snappy.decompress(blink_bytes[3:]) blink_value = blink.V8ScriptValueDecoder.FromBytes(blink_bytes) return ObjectStoreDataValue( - unknown=unknown_integer, - is_wrapped=False, + version=version, + is_wrapped=is_wrapped, blob_size=None, blob_offset=None, value=blink_value) diff --git a/tests/dfindexeddb/indexeddb/chromium/record.py b/tests/dfindexeddb/indexeddb/chromium/record.py index f3b4a2f..8a29ceb 100644 --- a/tests/dfindexeddb/indexeddb/chromium/record.py +++ b/tests/dfindexeddb/indexeddb/chromium/record.py @@ -95,7 +95,7 @@ def test_data_version_key(self): expected_key = record.DataVersionKey( offset=4, key_prefix=record.KeyPrefix( offset=0, database_id=0, object_store_id=0, index_id=0)) - expected_value = 20 + expected_value = 64424509460 record_bytes = ((b'\x00\x00\x00\x00\x02'), (b'\x14\x00\x00\x00\x0f')) parsed_key = record.DataVersionKey.FromBytes(record_bytes[0]) @@ -579,10 +579,10 @@ def test_object_store_data_key(self): encoded_user_key=record.IDBKey( offset=4, type=definitions.IDBKeyType.NUMBER, value=3.0)) expected_value = record.ObjectStoreDataValue( - unknown=4, + version=4, is_wrapped=True, - blob_offset=1, - blob_size=2303, + blob_offset=0, + blob_size=102480, value=None) record_bytes = ( b'\x00\x01\x01\x01\x03\x00\x00\x00\x00\x00\x00\x08@',