This repository has been archived by the owner on Oct 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
calmerge
executable file
·248 lines (220 loc) · 8.22 KB
/
calmerge
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/env python3
"""
Merges multiple 'calories' databases together.
Merges entries and weights from the current and any
database(s) passed as arguments, removing
duplicate entries. The resulting entries are imported
into the currently active database, using 'calories import'
For food, a duplicate is determined by using the following:
(created, entryDate, calories, food)
i.e., ignoring the bmr/amr, since those may differ based on
inconsistencies between weights on different databases
For weight duplicates are determined using:
(created, weight)
ids may change during the merge process
"""
import os
import sys
import json
import logging
import argparse
import shlex
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import List, NamedTuple, Dict, Any, Optional, Set, Tuple
calories_path: Optional[str] = shutil.which("calories")
if calories_path is None:
raise RuntimeError("Couldn't find 'calories' on your $PATH")
Json = Dict[str, Any]
def getLogger(debug: bool = False) -> logging.Logger:
lgr = logging.getLogger("calmerge")
# early return if the logger has already been configured
if lgr.handlers:
return lgr
if debug:
lgr.setLevel(logging.DEBUG)
else:
lgr.setLevel(logging.INFO)
ch = logging.StreamHandler()
fmt = logging.Formatter("%(message)s")
ch.setFormatter(fmt)
lgr.addHandler(ch)
return lgr
class Options(NamedTuple):
dbs: List[str]
debug: bool
skip_backup: bool
def parse_args() -> Options:
parser = argparse.ArgumentParser(
prog="calmerge",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"CALORIES_DATABASE",
help="One or more calories databases to use as input",
metavar="CALORIES_DATABASE",
nargs="+",
)
parser.add_argument(
"-d", "--debug", action="store_true", help="Increase log verbosity"
)
parser.add_argument(
"--skip-backup", action="store_true", help="Dont backup the active database"
)
args = parser.parse_args()
dbs: List[str] = args.CALORIES_DATABASE
if len(dbs) < 1:
print("Must provide at least one databases to merge...", file=sys.stderr)
sys.exit(1)
for db in dbs:
if not os.path.exists(db):
print(f'"{db}" does not exist', file=sys.stderr)
sys.exit(1)
return Options(
dbs=[os.path.abspath(db) for db in dbs],
debug=args.debug,
skip_backup=args.skip_backup,
)
def caloriesconf() -> Path:
if "GOBIN" not in os.environ:
print("The 'GOBIN' environment variable must be set", file=sys.stderr)
sys.exit(1)
return Path(os.environ["GOBIN"]) / ".caloriesconf"
def set_caloriesconf(set_to: str) -> None:
"""
Update the .caloriesconf file to the passed path
"""
if not os.path.exists(set_to):
print(
f"Could not set .caloriesconf to {set_to}, path does not exist",
file=sys.stderr,
)
sys.exit(1)
with caloriesconf().open("w") as f:
f.write(set_to)
def calories_backup(db: str) -> None:
"""
Saves a copy of the active database, incase something terrible happens during merging
"""
logger = getLogger()
backup: str = db + ".bak"
logger.info(f'Saving a backup of the active database to "{str(backup)}"')
shutil.copyfile(db, backup)
def calories_export(from_db: Optional[str] = None) -> Json:
"""
If no database is passed, uses the currently active database
"""
logger = getLogger()
if from_db is not None:
set_caloriesconf(from_db)
logger.info(f"Exporting information from {str(from_db)}")
else:
logger.info("Exporting information from currently active database")
proc: subprocess.CompletedProcess = subprocess.run(
[calories_path, "export"], # type: ignore[list-item]
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if proc.returncode != 0:
raise RuntimeError("'calories export' returned non-zero exit code")
json_cals: Json = json.loads(proc.stdout)
logger.debug(
f"Read {len(json_cals['entries'])} entries and {len(json_cals['weights'])} weights"
)
return json_cals
def merge_data(clr_data: Dict[str, Any], use_conf_from: str) -> Json:
"""
Takes multiple databases data and the database which configuration should be used from
Removes duplicate entries and weights, and returns the corresponding dictionary, which
can be encoded to JSON and used for 'calories import'
"""
logger = getLogger()
final_data: Json = {}
logger.debug("Using config from {}".format(use_conf_from))
final_data["config"] = clr_data[use_conf_from]["config"]
final_data["entries"] = []
final_data["weights"] = []
# need to temporarily remove ids from dictionaries, while comparing across dbs
entry_set: Set[Tuple[str, str, int, str]] = set()
weight_set: Set[Tuple[str, float]] = set()
for path, db_data in clr_data.items():
logger.debug(f"{path}: Processing entries (food)")
# entries
entry_skipped: int = 0
for item in db_data["entries"]:
ekey = (item["created"], item["entryDate"], item["calories"], item["food"])
if ekey in entry_set:
entry_skipped += 1
continue
entry_set.add(ekey)
final_data["entries"].append(item)
logger.debug(
f"{path}: skipped {entry_skipped} of {len(db_data['entries'])} entries"
)
# weights
logger.debug(f"{path}: Processing weights")
weight_skipped: int = 0
for item in db_data["weights"]:
wkey = (item["created"], item["weight"])
if wkey in weight_set:
weight_skipped += 1
continue
weight_set.add(wkey)
final_data["weights"].append(item)
logger.debug(
f"{path}: skipped {weight_skipped} of {len(db_data['weights'])} weights"
)
# sort entries and weights, by comparing the created string (dates in the fmt sort alphabetically)
final_data["entries"] = sorted(final_data["entries"], key=lambda e: e["created"])
final_data["weights"] = sorted(final_data["weights"], key=lambda w: w["created"])
# rewrite ids on dictionaries. should be close (but not always match)
# the order in the original database, since dictionaries are ordered in
# recent python versions and are traversed in order
for new_id, entry in enumerate(final_data["entries"], 1):
entry["id"] = new_id
for new_id, entry in enumerate(final_data["weights"], 1):
entry["id"] = new_id
return final_data
def calories_import(merged_data: Json) -> None:
logger = getLogger()
logger.info("Merging into active database ({})".format(caloriesconf().read_text()))
tf = tempfile.NamedTemporaryFile(mode="w", delete=False)
logger.debug(f"Using temporary file {tf.name} to run 'calories import'")
json.dump(merged_data, tf, indent=4)
tf.flush()
tf.close()
cmd_parts: List[str] = shlex.split(f"calories import --f={tf.name}")
proc: subprocess.CompletedProcess = subprocess.run(
cmd_parts,
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
print(proc.stdout)
if proc.returncode != 0:
print(proc.stderr)
raise RuntimeError("'calories import' returned non-zero exit code")
def main() -> None:
opts: Options = parse_args()
logger = getLogger(opts.debug)
# save what caloriesconf contains when this is initially run
orig_conf: str = caloriesconf().read_text()
logger.debug(f"caloriesconf originally contained: {orig_conf}")
calories_backup(orig_conf)
# map of filenames to 'calories export' JSON data
clr_data = {}
clr_data[orig_conf] = calories_export()
for db in opts.dbs:
clr_data[db] = calories_export(db)
# set caloriesconf back to the original, so 'calories import'
# saves merged info back to the active database
set_caloriesconf(orig_conf)
# merge the json, removing duplicates
final_data: Json = merge_data(clr_data, orig_conf)
calories_import(final_data)
if __name__ == "__main__":
main()