-
Notifications
You must be signed in to change notification settings - Fork 2
/
update.py
125 lines (112 loc) · 3.97 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import argparse
import json
import logging
import os
from datetime import date, timedelta
from pathlib import Path
from shutil import copy, rmtree
import update_cache
import update_consumer_data
import update_consumer_stats
import update_dataset
import update_package_list
import update_stats
import utils
_LOGGER = logging.getLogger(__name__)
def check_file(value):
if value is None:
return None
result = Path(value)
if not result.exists() or not result.is_file():
raise ValueError(result)
return result
if __name__ == "__main__":
default_end = date.today() - timedelta(days=1)
default_start = default_end - timedelta(days=365 * 2)
parser = argparse.ArgumentParser(
description="Update manylinux timeline",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"-t",
"--top-packages",
action="store_true",
help="check for new packages using manylinux wheels in top packages",
)
parser.add_argument(
"--sethmlarson-pypi-data",
action="store_true",
help="check for new packages using manylinux wheels in sethmlarson/pypi-data",
)
parser.add_argument(
"-s",
"--start",
default=default_start,
type=date.fromisoformat,
help="start date",
)
parser.add_argument(
"-e", "--end", default=default_end, type=date.fromisoformat, help="end date"
)
parser.add_argument("--skip-cache", action="store_true", help="skip cache update")
parser.add_argument(
"--bigquery-credentials",
type=check_file,
help="path to bigquery credentials (enables bigquery)",
)
parser.add_argument(
"-v", "--verbosity", action="count", help="increase output verbosity"
)
args = parser.parse_args()
logging.basicConfig(level=30 - 10 * min(args.verbosity or 0, 2))
start = args.start
end = args.end
if end > default_end:
end = default_end
_LOGGER.warning(
f"end date ({args.end}) adjusted to the default end " f"date ({end})"
)
if start >= end:
raise ValueError(f"{start} >= {end}")
if utils.BUILD_PATH.exists():
rmtree(utils.BUILD_PATH)
utils.BUILD_PATH.mkdir()
utils.CACHE_PATH.mkdir(exist_ok=True)
_LOGGER.debug("updating consumer data")
update_consumer_data.update(
utils.ROOT_PATH / "consumer_data", args.bigquery_credentials
)
update_consumer_stats.update(utils.ROOT_PATH / "consumer_data", start, end)
_LOGGER.debug("loading package list")
with open(utils.ROOT_PATH / "packages.json") as f:
packages = json.load(f)
_LOGGER.debug(f"loaded {len(packages)} package names")
skip_update_package_list = False
if "GITHUB_EVENT_NAME" in os.environ:
event_name = os.environ["GITHUB_EVENT_NAME"]
today = date.today()
if event_name != "schedule":
_LOGGER.info(f"skip package list update for event '{event_name}'")
skip_update_package_list = True
elif today.isoweekday() == 3 and 7 < today.day <= 14:
# use top_packages & sethmlarson_pypi_data one wednesday per month
args.top_packages = True
args.sethmlarson_pypi_data = True
if not skip_update_package_list:
packages = update_package_list.update(
packages,
args.top_packages,
args.sethmlarson_pypi_data,
args.bigquery_credentials,
)
if not args.skip_cache:
packages = update_cache.update(packages)
packages, rows = update_dataset.update(packages)
with open(utils.ROOT_PATH / "packages.json", "w") as f:
json.dump(packages, f, indent=0)
f.write("\n")
update_stats.update(rows, start, end)
copy(utils.ROOT_PATH / "index.html", utils.BUILD_PATH)
copy(utils.ROOT_PATH / "style.css", utils.BUILD_PATH)
copy(utils.ROOT_PATH / "favicon.ico", utils.BUILD_PATH)
copy(utils.ROOT_PATH / ".gitignore", utils.BUILD_PATH)