Skip to content

Commit

Permalink
modernize compilation of Sudachi dictionaries
Browse files Browse the repository at this point in the history
  • Loading branch information
eiennohito committed Jul 31, 2023
1 parent 0e7a734 commit 462d569
Show file tree
Hide file tree
Showing 11 changed files with 277 additions and 154 deletions.
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ local.properties
# JDT-specific (Eclipse Java Development Tools)
.classpath

# IntelliJ

.idea/

### Java ###
# Compiled class file
*.class
Expand Down Expand Up @@ -177,4 +181,7 @@ gradle-app.setting
# Cache of project
.gradletasknamecache

src/main/text/*_lex.csv
src/main/text/*_lex.csv

# Python Virtual Environments for development
.venv*/
111 changes: 84 additions & 27 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
plugins {
id 'java-library'
id "de.undercouch.download" version "5.4.0"
}

apply from: 'gradle/version.gradle'
Expand All @@ -9,48 +10,104 @@ repositories {
}

dependencies {
implementation 'com.worksap.nlp:sudachi:0.5.1'
implementation 'com.worksap.nlp:sudachi:0.7.2'
testImplementation 'junit:junit:4.13.1', 'org.hamcrest:hamcrest:2.1'
}

def dictionarySrcDir = 'src/main/text'
def dictionarySrc = [ small: ['small'], core: ['small', 'core'], full: ['small', 'core', 'notcore']]
def dictionarySrc = [ small: ['small'], core: ['small', 'core'], 'full': ['small', 'core', 'notcore']]

task unzipMatrixDef(type: Copy) {
def zipFile = file("${dictionarySrcDir}/matrix.def.zip")
def outputdir = file('build')
def downloadCacheDir = new File(project.buildDir, "cache")

def matrixDefDownload = tasks.register("downloadMatrixDef", Download) {
src "http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict-raw/matrix.def.zip"
dest new File(downloadCacheDir, "matrix.def.zip")
overwrite false
}

def unzipMatrixDef = tasks.register('unzipMatrixDef', Copy) {
def zipFile = matrixDefDownload.get().outputs.files.singleFile
def outputDir = new File(project.buildDir, "dict/raw/matrix")
from zipTree(zipFile)
into outputdir
into outputDir
dependsOn(matrixDefDownload)
inputs.file(zipFile)
}

dictionarySrc.each {
def name = it.key
def taskName = "build${name.capitalize()}Dict"
def sources = it.value.collect { "${dictionarySrcDir}/${it}_lex.csv" }
task "${taskName}"(type: JavaExec) {
main = "com.worksap.nlp.sudachi.dictionary.DictionaryBuilder"
classpath = sourceSets.main.runtimeClasspath
args('-o', "build/system_${name}.dic", '-m', 'build/matrix.def', '-d', "${version}", *sources)
maxHeapSize = "4g"
dependsOn unzipMatrixDef
def rawDictSrcs = dictionarySrc.collectMany { it.value }.toSet()

rawDictSrcs.forEach { name ->
final String capitalName = name.capitalize()
final String version = project.property("dict.version").toString()

def downloadTask = tasks.register("download${capitalName}Zip", Download) {
def filename = "${name}_lex.zip"
src "http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict-raw/${version}/$filename"
dest new File(downloadCacheDir, "${version}/$name/$filename")
overwrite false
inputs.property("version", version)
}

tasks.register("unzip${capitalName}Dict", Copy) {
def zipFile = downloadTask.get().outputs.files.singleFile
def outputDir = new File(project.buildDir, "dict/raw/$version/$name")
from zipTree(zipFile)
into outputDir
dependsOn(downloadTask)
}
test.dependsOn taskName
}

dictionarySrc.each {
def builtDictDir = new File(project.buildDir, "dict/bin/${property("dict.version")}")

dictionarySrc.entrySet().forEach { e ->
def capitalName = e.key.capitalize()
def version = project.property("dict.version").toString()
def name = e.key

def compileTask = tasks.register("compile${capitalName}Dict", JavaExec) { t ->
def sources = e.value.collect { source ->
def srcName = source.toString().capitalize()
t.dependsOn(tasks.named("unzip${srcName}Dict"))
def dictCsv = new File(project.buildDir, "dict/raw/$version/$source/${source}_lex.csv")
t.inputs.file(dictCsv)
dictCsv
}

def outputFile = new File(builtDictDir, "system_${name}.dic")
def matrixFile = new File(unzipMatrixDef.get().outputs.files.singleFile, "matrix.def")

t.mainClass.set("com.worksap.nlp.sudachi.dictionary.DictionaryBuilder")
t.classpath = sourceSets.main.runtimeClasspath
t.args('-o', outputFile.toString(), '-m', matrixFile, '-d', "${version}", *sources)
t.maxHeapSize = "4g"
t.dependsOn unzipMatrixDef
t.systemProperty('file.encoding', 'UTF-8')
t.inputs.file(matrixFile)
t.outputs.file(outputFile)
}
test.dependsOn(compileTask)
}

dictionarySrc.entrySet().forEach {
def name = it.key
def taskName = "zip${name.capitalize()}Dict"
task "${taskName}"(type: Zip) {
def task = tasks.register("zip${name.capitalize()}Dict", Zip) {
def compileTask = tasks.getByName("compile${name.capitalize()}Dict")
def compiledDict = compileTask.outputs.files.singleFile

archiveBaseName = 'sudachi-dictionary'
archiveVersion = version
archiveVersion.set(version)
archiveClassifier = name
from "build/system_${name}.dic", 'LEGAL', 'LICENSE-2.0.txt'
from compiledDict, 'LEGAL', 'LICENSE-2.0.txt'
into "sudachi-dictionary-${version}"
dependsOn "build${name.capitalize()}Dict"
dependsOn(compileTask)
}
build.dependsOn taskName
tasks.named('build').configure { dependsOn(task) }
}

tasks.withType(Test) {
systemProperty('buildDirectory', 'build')
tasks.withType(JavaCompile).configureEach {
options.encoding = 'UTF-8'
}

tasks.withType(Test).configureEach {
systemProperty('buildDirectory', builtDictDir.toString())
systemProperty('file.encoding', 'UTF-8')
}
1 change: 1 addition & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dict.version=20230110
2 changes: 1 addition & 1 deletion gradle/version.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ version = describedCommit +
(describedCommit.matches(".*-[0-9]+-g[0-9a-f]{7}") ? "-SNAPSHOT" : "") +
(git.status().isClean() ? "" : "+dirty")

task showVersion {
tasks.register("showVersion") {
doLast {
println version
}
Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
3 changes: 2 additions & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2.1-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Loading

0 comments on commit 462d569

Please sign in to comment.