-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtar2json
executable file
·73 lines (59 loc) · 1.85 KB
/
tar2json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
#
# Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
# This file is part of webloader (see TBD).
# See the LICENSE file for licensing terms (BSD-style).
#
import argparse
import json
import sys
from itertools import islice
import yaml
from tarproclib import reader
epilog = """
"""
parser = argparse.ArgumentParser(
"Extract parts of a tar file and output in YAML/JSON format.", epilog=epilog
)
parser.add_argument("-k", "--keys", default="", help="keys to extract")
parser.add_argument(
"-a", "--available", action="store_true", help="list available keys and exit"
)
parser.add_argument(
"-f", "--format", default="yaml", help="output format (yaml, json, jsonlines)"
)
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("--nokey", action="store_true")
args = parser.parse_args()
keys = args.keys.split()
if len(keys) == 0:
sys.exit("please specify keys to extract with the -k flag")
if not args.nokey:
keys = ["__key__"] + keys
if args.available:
keyset = set()
for i, sample in islice(enumerate(reader.TarIterator("-")), 0, 100):
keyset = keyset.union(set(sample.keys()))
print(sorted(keyset))
sys.exit(0)
def decode(s):
try:
return s.decode("utf-8")
except:
return s
for i, sample in enumerate(reader.TarIterator("-")):
if args.verbose:
print("SAMPLE:", repr(sample), file=sys.stderr)
result = {k: decode(sample.get(k, None)) for k in keys}
if args.format == "yaml":
print("---")
print(yaml.dump(result))
elif args.format == "json":
print(json.dumps(result, indent=4))
print("")
elif args.format == "jsonlines":
result = json.dumps(result).strip()
assert "\n" not in result
print(result)
else:
sys.exit(f"{args.format}: unknown output format")