Skip to content

Commit

Permalink
add script to upload converted dictionaries to S3
Browse files Browse the repository at this point in the history
  • Loading branch information
eiennohito committed Aug 3, 2023
1 parent 32420e5 commit d2a14dc
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 13 deletions.
3 changes: 2 additions & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
dict.version=20230110
dict.version=20230110
dict.release=false
27 changes: 21 additions & 6 deletions gradle/version.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,29 @@ buildscript {
dependencies.classpath 'org.ajoberstar:grgit:1.7.2'
}

ext {
git = org.ajoberstar.grgit.Grgit.open(file('.'))
describedCommit = git.describe().toString().trim().replaceFirst('\\Av', '')
def commitFromGit() {
def git = org.ajoberstar.grgit.Grgit.open(file('.'))
def describedCommit = git.describe().toString().trim().replaceFirst('\\Av', '')
String suffix1 = ""
if (describedCommit.matches(".*-[0-9]+-g[0-9a-f]{7}")) {
suffix1 = "-SNAPSHOT"
}
String suffix2 = ""
if (!git.status().isClean()) {
suffix2 = "+dirty"
}
return describedCommit + suffix1 + suffix2
}

def versionString() {
if (property("dict.release") == "true") {
return property("dict.version")
} else {
return commitFromGit()
}
}

version = describedCommit +
(describedCommit.matches(".*-[0-9]+-g[0-9a-f]{7}") ? "-SNAPSHOT" : "") +
(git.status().isClean() ? "" : "+dirty")
version = versionString()

tasks.register("showVersion") {
doLast {
Expand Down
7 changes: 1 addition & 6 deletions scripts/01_upload_raw_dictionaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from classopt import classopt

from aws_common import CredentialCache
from fs_common import validate_file
from raw_listing import generate_raw_listing


Expand All @@ -17,12 +18,6 @@ class Opts:
s3_prefix: str = "sudachidict-raw"


def validate_file(file: Path) -> Path:
if file.exists():
return file
raise FileNotFoundError(f"required file {file} was not present")


def validate_files(args: Opts) -> list[Path]:
return [
validate_file(args.input / "small_lex.zip"),
Expand Down
70 changes: 70 additions & 0 deletions scripts/02_upload_compiled_dictionaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import re
from pathlib import Path

from classopt import classopt, config

from aws_common import CredentialCache


@classopt(default_long=True)
class Opts:
input: Path
aws_profile: str = None
aws_mfa: str = None
aws_region: str = "ap-northeast-1"
s3_bucket: str = "sudachi"
s3_prefix: str = "sudachidict"
no_latest: bool = config(action='store_true')


BINARY_DIC_PATTERN = re.compile("^sudachi-dictionary-.*-(small|core|full).zip$")


def make_latest_name(name: str) -> str:
m = BINARY_DIC_PATTERN.match(name)
kind = m.group(1)
return f"sudachi-dictionary-latest-{kind}.zip"


def prepare_files(args: Opts) -> list[Path]:
result = []

for child in args.input.iterdir():
if child.is_file() and BINARY_DIC_PATTERN.match(child.name) is not None:
result.append(child)

return result


def upload_files(s3, args: Opts, files: list[Path]):
bucket = s3.Bucket(args.s3_bucket)
for file in files:
s3_key = f"{args.s3_prefix}/{file.name}"
with file.open('rb') as f:
resp = bucket.put_object(
Body=f,
Key=s3_key,
ContentType='application/zip'
)
print("put", file, "size", resp.content_length, "to", s3_key, "etag", resp.e_tag)

if not args.no_latest:
latest_name = make_latest_name(file.name)
latest_s3_key = f"{args.s3_prefix}/{latest_name}"
bucket.put_object(
Body=b"",
Key=latest_s3_key,
WebsiteRedirectLocation="/" + s3_key
)
print("set", latest_s3_key, "redirect to", s3_key)


def main(opts: Opts):
session = CredentialCache(opts.aws_profile, opts.aws_mfa).session
files = prepare_files(opts)
s3 = session.resource("s3")
upload_files(s3, opts, files)


if __name__ == '__main__':
main(Opts.from_args())
7 changes: 7 additions & 0 deletions scripts/fs_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path


def validate_file(file: Path) -> Path:
if file.exists():
return file
raise FileNotFoundError(f"required file {file} was not present")

0 comments on commit d2a14dc

Please sign in to comment.