Skip to content

Commit

Permalink
fix: [crashlogs] improve parsing + slightly better tests fixes #113
Browse files Browse the repository at this point in the history
  • Loading branch information
cvandeplas committed Nov 6, 2024
1 parent 568f4fb commit ac1bb41
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 58 deletions.
174 changes: 116 additions & 58 deletions src/sysdiagnose/parsers/crashlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,73 +59,127 @@ def execute(self) -> list | dict:
continue
seen.add(ips_hash)
result.append(ips)
except Exception as e:
except Exception:
logger.warning(f"Skipping file due to error {file}", exc_info=True)
return result

def parse_ips_file(path: str) -> list | dict:
# identify the type of file
with open(path, 'r') as f:
result = json.loads(f.readline()) # first line
result['report'] = {}
lines = f.readlines()

# next section is json structure
if lines[0].startswith('{') and lines[len(lines) - 1].strip().endswith('}'):
result['report'] = json.loads('\n'.join(lines))

else:
# next section is structured text
# either key: value
# or key:
# multiple lines
# key:
# multiple lines
n = 0
while n < len(lines):
line = lines[n].strip()

if not line:
n += 1
continue

if ':' in line:
key, value = line.split(':', 1)
key = key.strip()
if value.strip():
result['report'][key] = value.strip()
else:
result['report'][key] = []
n += 1
while n < len(lines):
line = lines[n].strip()
if not line: # end of section
break

if 'Thread' in key and 'crashed with ARM Thread State' in key:
if result['report'][key] == []:
result['report'][key] = {}
result['report'][key].update(CrashLogsParser.split_thread_crashes_with_arm_thread_state(line))
elif 'Binary Images' in key:
result['report'][key].append(CrashLogsParser.split_binary_images(line))
elif 'Thread' in key:
result['report'][key].append(CrashLogsParser.split_thread(line))
else:
result['report'][key].append(line)
n += 1
elif line == 'EOF':
break
else:
raise Exception(f"Parser bug: Unexpected line in crashlogs at line {n}. Line: {line}")

n += 1
result['report'] = CrashLogsParser.process_ips_lines(lines)

timestamp = datetime.strptime(result['timestamp'], '%Y-%m-%d %H:%M:%S.%f %z')
result['timestamp_orig'] = result['timestamp']
result['datetime'] = timestamp.isoformat(timespec='microseconds')
result['timestamp'] = timestamp.timestamp()
return result

def process_ips_lines(lines: list) -> dict:
'''
There are 2 main models of crashlogs:
- one big entry nicely structured in json.
- pseudo-structured text. with multiple powerstats entries
'''
result = {}
# next section is json structure
if lines[0].startswith('{') and lines[len(lines) - 1].strip().endswith('}'):
result = json.loads('\n'.join(lines))
return result

# next section is structured text
# either key: value
# or key:
# multiple lines
# key:
# multiple lines
# two empty lines = end of section and prepare for next powerstats entry
# LATER this is not the cleanest way to parse this. But it works for now
n = 0
powerstats_key = None
while n < len(lines):
line = lines[n].strip()

if not line:
n += 1
continue

if ':' in line:
key, value = line.split(':', 1)
key = key.strip()

if 'Powerstats' in key:
powerstats_key = value.split()[0]
if 'Powerstats' not in result:
result['Powerstats'] = {}
if powerstats_key not in result['Powerstats']:
result['Powerstats'][powerstats_key] = {}

# key, value entry
if value.strip():
if powerstats_key:
result['Powerstats'][powerstats_key][key] = value.strip()
else:
result[key] = value.strip()
# only a key, so the next lines are values
else:
if powerstats_key:
result['Powerstats'][powerstats_key][key] = []
else:
result[key] = []
n += 1
while n < len(lines):
line = lines[n].strip()
if not line: # end of section
break

if 'Thread' in key and 'crashed with ARM Thread State' in key:
if powerstats_key and result['Powerstats'][powerstats_key][key] == []:
result['Powerstats'][powerstats_key][key] = {}
else:
result[key] = {}

if powerstats_key:
result['Powerstats'][powerstats_key][key].update(CrashLogsParser.split_thread_crashes_with_arm_thread_state(line))
else:
result[key].update(CrashLogsParser.split_thread_crashes_with_arm_thread_state(line))

elif 'Binary Images' in key:
if powerstats_key:
result['Powerstats'][powerstats_key][key].append(CrashLogsParser.split_binary_images(line))
else:
result[key].append(CrashLogsParser.split_binary_images(line))

elif 'Thread' in key:
if powerstats_key:
result['Powerstats'][powerstats_key][key].append(CrashLogsParser.split_thread(line))
else:
result[key].append(CrashLogsParser.split_thread(line))
else:
if powerstats_key:
result['Powerstats'][powerstats_key][key].append(line)
else:
result[key].append(line)
n += 1
elif powerstats_key:
if 'extra_data' not in result['Powerstats'][powerstats_key]:
result['Powerstats'][powerstats_key]['extra_data'] = []
result['Powerstats'][powerstats_key]['extra_data'].append(lines[n].rstrip()) # not with strip()

elif line == 'EOF':
break
# elif re.match(r'[0-9]+\s+\?\?\?\s+\(', line):
# current_entry['unknown'] = line

else:
raise Exception(f"Parser bug: Unexpected line in crashlogs at line {n}. Line: {line}")

n += 1

return result

def parse_summary_file(path: str) -> list | dict:
logger.info(f"Parsing summary file: {path}")
result = []
Expand Down Expand Up @@ -154,6 +208,7 @@ def split_thread_crashes_with_arm_thread_state(line) -> dict:
result = {}
for i in range(0, len(elements), 2):
if not elements[i].endswith(':'):
result['error'] = ' '.join(elements[i:len(elements)])
break # last entry is not a valid key:value
result[elements[i][:-1]] = elements[i + 1]
return result
Expand All @@ -170,14 +225,17 @@ def split_thread(line) -> dict:
return result

def split_binary_images(line) -> dict:
elements = line.split()
# need to be regexp based
# option 1: image_offset_start image_offset_end image_name uuid path
m = re.search(r'\s*(\w+) -\s+([^\s]+)\s+([^<]+)<([^>]+)>\s+(.+)', line)

elements = m.groups()
result = {
'image_offset_start': elements[0],
'image_offset_end': elements[2],
'image_name': elements[3],
'arch': elements[4],
'uuid': elements[5][1:-1],
'path': elements[6],
'image_offset_start': elements[0].strip(),
'image_offset_end': elements[1].strip(),
'image_name': elements[2].strip(),
'uuid': elements[3].strip(),
'path': elements[4].strip(),
}
return result

Expand Down
63 changes: 63 additions & 0 deletions tests/test_parsers_crashlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,69 @@ def test_parse_crashlogs(self):
for item in result:
self.assertTrue('timestamp' in item)

def test_header_section(self):
lines = [
'Date/Time: 2024-08-30 11:18:54.620 +0200',
'End time: 2024-08-30 11:21:00.954 +0200',
'OS Version: iPhone OS 17.6.1 (Build 21G93)',
'Architecture: arm64e'
]
expected_results = {'Date/Time': '2024-08-30 11:18:54.620 +0200', 'End time': '2024-08-30 11:21:00.954 +0200', 'OS Version': 'iPhone OS 17.6.1 (Build 21G93)', 'Architecture': 'arm64e'}
result = CrashLogsParser.process_ips_lines(lines)
self.maxDiff = None
self.assertEqual(result, expected_results)

def test_split_thread(self):
lines = [
'0 libsystem_kernel.dylib 0x0000000123456789 0x123456000 + 123456'
]
expected_results = [
{'id': '0', 'image_name': 'libsystem_kernel.dylib', 'image_base': '0x0000000123456789', 'image_offset': '0x123456000', 'symbol_offset': '123456'}
]
for line, expected_result in zip(lines, expected_results, strict=True):
result = CrashLogsParser.split_thread(line)
self.assertEqual(result, expected_result)

def test_split_binary_images(self):
lines = [
' 0x123456000 - ??? com.apple.foo (1) <5BFC3EC3-2045-4F95-880A-DEC88832F639> /System/Library/bar',
' 0x123456000 - 0x123456fff libhello <5BFC3EC3-2045-4F95-880A-DEC88832F639> /usr/lib/hello',
'0x123456000 - 0x123456fff FooBar arm64 <5BFC3EC320454F95880ADEC88832F639> /System/Library/bar'
]
expected_results = [
{'image_offset_start': '0x123456000', 'image_offset_end': '???',
'image_name': 'com.apple.foo (1)',
'uuid': '5BFC3EC3-2045-4F95-880A-DEC88832F639',
'path': '/System/Library/bar'},
{'image_offset_start': '0x123456000', 'image_offset_end': '0x123456fff',
'image_name': 'libhello',
'uuid': '5BFC3EC3-2045-4F95-880A-DEC88832F639', 'path': '/usr/lib/hello'},
{'image_offset_start': '0x123456000', 'image_offset_end': '0x123456fff',
'image_name': 'FooBar arm64',
'uuid': '5BFC3EC320454F95880ADEC88832F639',
'path': '/System/Library/bar'}
]
for line, expected_result in zip(lines, expected_results, strict=True):
result = CrashLogsParser.split_binary_images(line)
self.assertEqual(result, expected_result)

def test_split_thread_crashes_with_arm_thread_state(self):
lines = [
' x0: 0x0000000000000012 x1: 0x0000000000000002 x2: 0x0000000123456789 x3: 0x0000000000000001',
' x28: 0x0000000000180000 fp: 0x0000000123456789 lr: 0x0000000123456789',
' sp: 0x0000000123456789 pc: 0x0000000123456789 cpsr: 0x40000000',
' esr: 0x12345667 Address size fault',
]
expected_results = [
{'x0': '0x0000000000000012', 'x1': '0x0000000000000002', 'x2': '0x0000000123456789', 'x3': '0x0000000000000001'},
{'x28': '0x0000000000180000', 'fp': '0x0000000123456789', 'lr': '0x0000000123456789'},
{'sp': '0x0000000123456789', 'pc': '0x0000000123456789', 'cpsr': '0x40000000'},
{'esr': '0x12345667', 'error': 'Address size fault'},
]
for line, expected_result in zip(lines, expected_results, strict=True):
result = CrashLogsParser.split_thread_crashes_with_arm_thread_state(line)
self.assertEqual(result, expected_result)


if __name__ == '__main__':
unittest.main()

0 comments on commit ac1bb41

Please sign in to comment.