diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 11bff58025..7373178e61 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -15,8 +15,7 @@ # Backend services owners /packages/server/ @dgauldie @dvince2 @YohannParis /packages/db-mirgation/ @dgauldie @dvince2 -/packages/taskrunner/ @kbirk @YohannParis @mwdchang -/packages/esingest/ @kbirk @j2whiting +/packages/taskrunner/ @kbirk @YohannParis # Documentation owners docs/* @dgauldie @pascaleproulx @YohannParis @dvince2 diff --git a/.vscode/launch.json b/.vscode/launch.json index 0a61aa5545..d6416e6dfe 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -57,16 +57,6 @@ "args": [ "--spring.profiles.active=default,local,ide" ] - }, - { - "type": "java", - "name": "ElasticIngestApplication", - "request": "launch", - "mainClass": "software.uncharted.terarium.esingest.ElasticIngestApplication", - "projectName": "es-ingest", - "args": [ - "--spring.profiles.active=default,local,ide" - ] } ] } diff --git a/packages/es-ingest/.gitignore b/packages/es-ingest/.gitignore deleted file mode 100644 index e177d123d9..0000000000 --- a/packages/es-ingest/.gitignore +++ /dev/null @@ -1,40 +0,0 @@ -### secrets ### -/src/main/resources/application-secrets.properties - -HELP.md -.gradle -build/ -!gradle/wrapper/gradle-wrapper.jar -!**/src/main/**/build/ -!**/src/test/**/build/ - -### STS ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache -bin/ -!**/src/main/**/bin/ -!**/src/test/**/bin/ - -### IntelliJ IDEA ### -.idea -*.iws -*.iml -*.ipr -out/ -!**/src/main/**/out/ -!**/src/test/**/out/ - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ - -### VS Code ### -.vscode/ diff --git a/packages/es-ingest/README.md b/packages/es-ingest/README.md deleted file mode 100644 index 429f0dcfe0..0000000000 --- a/packages/es-ingest/README.md +++ /dev/null @@ -1,103 +0,0 @@ -# Terarium Elasticsearch Ingest - -This package is designed to quickly import source documents along with their embeddings for knn semantic search in Elasticsearch. - -## How to setup an ingest: - -### Create the input and output class definitions: - -An ingest requires an input class that implements the `IInputDocument` interface and output class that implements the `IOutputDocument` interface. -```java -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class ExampleInputDocument implements IInputDocument { - UUID id; - String title; - String body; -} - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class ExampleOutputDocument implements IOutputDocument { - UUID id; - String title; - String body; - List topics; - - void addTopics(List topics) { - if (topics == null) { - topics = new ArrayList<>(); - } - topics.addAll(ts); - } - -} - -``` - -### Create an `IElasticPass` implementation: - -```java - -public class ExampleInsertPass implements IElasticPass { - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting documents"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - return new JSONLineIterator<>(Paths.get(params.getInputDir()), ExampleInputDocument.class, params.getBatchSize()); - } - - public List process(List input) { - List res = new ArrayList<>(); - for (ExampleInputDocument in : input) { - ExampleOutputDocument doc = new ExampleOutputDocument(); - doc.setId(in.getId()); - doc.setTitle(in.getSource().getTitle()); - doc.setFullText(input.getBody()); - res.add(doc); - } - return res; - } -} - -``` - -### Create an `IElasticIngest` implementation: - -Each ingest will require some logic to convert the `input` types to output types, this is done by implementing the `IElasticIngest` interface: - -```java -public class ExampleIngest implements IElasticIngest { - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public List> getPasses() { - return List.of(new ExampleInsertPass()); - } - -} -``` - -### Configuring the ingest in `application.properties`: - -Add an ingest entry to the `application.properties`: - -``` -terarium.esingest.ingestParams[0].name="A sample ingest" -terarium.esingest.ingestParams[0].inputDir=/path/to/source/dir -terarium.esingest.ingestParams[0].topics=some,topics,to,add,to,each,doc -terarium.esingest.ingestParams[0].outputIndexRoot=example -terarium.esingest.ingestParams[0].ingestClass=software.uncharted.terarium.esingest.ingests.ExampleIngest -``` diff --git a/packages/es-ingest/build.gradle b/packages/es-ingest/build.gradle deleted file mode 100644 index ed966fa711..0000000000 --- a/packages/es-ingest/build.gradle +++ /dev/null @@ -1,52 +0,0 @@ -plugins { - id 'java' - id 'org.springframework.boot' version '3.1.5' - id 'io.spring.dependency-management' version '1.1.4' -} - -group = 'software.uncharted' -version = '1.0.0-SNAPSHOT' -sourceCompatibility = '17' - -apply plugin: 'idea' - -configurations { - compileOnly { - extendsFrom annotationProcessor - } -} - -project.ext { - artifactName = 'es-ingest' - description = 'imports models into es' -} - -repositories { - mavenCentral() -} - -dependencies { - implementation 'org.springframework:spring-web' - implementation 'co.elastic.clients:elasticsearch-java:8.8.1' - implementation 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.17.19' - implementation 'org.springframework.boot:spring-boot-starter' - implementation 'com.fasterxml.jackson.core:jackson-databind:2.14.2' - implementation 'commons-io:commons-io:2.8.0' - implementation(platform("software.amazon.awssdk:bom:2.21.26")) - implementation("software.amazon.awssdk:s3") - - compileOnly 'org.projectlombok:lombok' - developmentOnly 'org.springframework.boot:spring-boot-devtools' - annotationProcessor 'org.projectlombok:lombok' - testImplementation 'org.springframework.boot:spring-boot-starter-test' - testAnnotationProcessor 'org.projectlombok:lombok' - testCompileOnly 'org.projectlombok:lombok' -} - -tasks.named('test') { - useJUnitPlatform() -} - -dependencyLocking { - lockAllConfigurations() -} diff --git a/packages/es-ingest/gradle/wrapper/gradle-wrapper.jar b/packages/es-ingest/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index d64cd49177..0000000000 Binary files a/packages/es-ingest/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/packages/es-ingest/gradle/wrapper/gradle-wrapper.properties b/packages/es-ingest/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 1af9e0930b..0000000000 --- a/packages/es-ingest/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,7 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip -networkTimeout=10000 -validateDistributionUrl=true -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/packages/es-ingest/gradlew b/packages/es-ingest/gradlew deleted file mode 100755 index 1aa94a4269..0000000000 --- a/packages/es-ingest/gradlew +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/sh - -# -# Copyright © 2015-2021 the original authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -############################################################################## -# -# Gradle start up script for POSIX generated by Gradle. -# -# Important for running: -# -# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is -# noncompliant, but you have some other compliant shell such as ksh or -# bash, then to run this script, type that shell name before the whole -# command line, like: -# -# ksh Gradle -# -# Busybox and similar reduced shells will NOT work, because this script -# requires all of these POSIX shell features: -# * functions; -# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», -# «${var#prefix}», «${var%suffix}», and «$( cmd )»; -# * compound commands having a testable exit status, especially «case»; -# * various built-in commands including «command», «set», and «ulimit». -# -# Important for patching: -# -# (2) This script targets any POSIX shell, so it avoids extensions provided -# by Bash, Ksh, etc; in particular arrays are avoided. -# -# The "traditional" practice of packing multiple parameters into a -# space-separated string is a well documented source of bugs and security -# problems, so this is (mostly) avoided, by progressively accumulating -# options in "$@", and eventually passing that to Java. -# -# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, -# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; -# see the in-line comments for details. -# -# There are tweaks for specific operating systems such as AIX, CygWin, -# Darwin, MinGW, and NonStop. -# -# (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt -# within the Gradle project. -# -# You can find Gradle at https://github.com/gradle/gradle/. -# -############################################################################## - -# Attempt to set APP_HOME - -# Resolve links: $0 may be a link -app_path=$0 - -# Need this for daisy-chained symlinks. -while - APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path - [ -h "$app_path" ] -do - ls=$( ls -ld "$app_path" ) - link=${ls#*' -> '} - case $link in #( - /*) app_path=$link ;; #( - *) app_path=$APP_HOME$link ;; - esac -done - -# This is normally unused -# shellcheck disable=SC2034 -APP_BASE_NAME=${0##*/} -# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD=maximum - -warn () { - echo "$*" -} >&2 - -die () { - echo - echo "$*" - echo - exit 1 -} >&2 - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "$( uname )" in #( - CYGWIN* ) cygwin=true ;; #( - Darwin* ) darwin=true ;; #( - MSYS* | MINGW* ) msys=true ;; #( - NONSTOP* ) nonstop=true ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD=$JAVA_HOME/jre/sh/java - else - JAVACMD=$JAVA_HOME/bin/java - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD=java - if ! command -v java >/dev/null 2>&1 - then - die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -fi - -# Increase the maximum file descriptors if we can. -if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then - case $MAX_FD in #( - max*) - # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - MAX_FD=$( ulimit -H -n ) || - warn "Could not query maximum file descriptor limit" - esac - case $MAX_FD in #( - '' | soft) :;; #( - *) - # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - ulimit -n "$MAX_FD" || - warn "Could not set maximum file descriptor limit to $MAX_FD" - esac -fi - -# Collect all arguments for the java command, stacking in reverse order: -# * args from the command line -# * the main class name -# * -classpath -# * -D...appname settings -# * --module-path (only if needed) -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. - -# For Cygwin or MSYS, switch paths to Windows format before running java -if "$cygwin" || "$msys" ; then - APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) - CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) - - JAVACMD=$( cygpath --unix "$JAVACMD" ) - - # Now convert the arguments - kludge to limit ourselves to /bin/sh - for arg do - if - case $arg in #( - -*) false ;; # don't mess with options #( - /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath - [ -e "$t" ] ;; #( - *) false ;; - esac - then - arg=$( cygpath --path --ignore --mixed "$arg" ) - fi - # Roll the args list around exactly as many times as the number of - # args, so each arg winds up back in the position where it started, but - # possibly modified. - # - # NB: a `for` loop captures its iteration list before it begins, so - # changing the positional parameters here affects neither the number of - # iterations, nor the values presented in `arg`. - shift # remove old arg - set -- "$@" "$arg" # push replacement arg - done -fi - - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' - -# Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, -# and any embedded shellness will be escaped. -# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be -# treated as '${Hostname}' itself on the command line. - -set -- \ - "-Dorg.gradle.appname=$APP_BASE_NAME" \ - -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ - "$@" - -# Stop when "xargs" is not available. -if ! command -v xargs >/dev/null 2>&1 -then - die "xargs is not available" -fi - -# Use "xargs" to parse quoted args. -# -# With -n1 it outputs one arg per line, with the quotes and backslashes removed. -# -# In Bash we could simply go: -# -# readarray ARGS < <( xargs -n1 <<<"$var" ) && -# set -- "${ARGS[@]}" "$@" -# -# but POSIX shell has neither arrays nor command substitution, so instead we -# post-process each arg (as a line of input to sed) to backslash-escape any -# character that might be a shell metacharacter, then use eval to reverse -# that process (while maintaining the separation between arguments), and wrap -# the whole thing up as a single "set" statement. -# -# This will of course break if any of these variables contains a newline or -# an unmatched quote. -# - -eval "set -- $( - printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | - xargs -n1 | - sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | - tr '\n' ' ' - )" '"$@"' - -exec "$JAVACMD" "$@" diff --git a/packages/es-ingest/gradlew.bat b/packages/es-ingest/gradlew.bat deleted file mode 100644 index 6689b85bee..0000000000 --- a/packages/es-ingest/gradlew.bat +++ /dev/null @@ -1,92 +0,0 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem - -@if "%DEBUG%"=="" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%"=="" set DIRNAME=. -@rem This is normally unused -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if %ERRORLEVEL% equ 0 goto execute - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/packages/es-ingest/settings.gradle b/packages/es-ingest/settings.gradle deleted file mode 100644 index aaa4109937..0000000000 --- a/packages/es-ingest/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = 'terarium' diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ElasticIngestApplication.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ElasticIngestApplication.java deleted file mode 100644 index f99427c838..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ElasticIngestApplication.java +++ /dev/null @@ -1,69 +0,0 @@ -package software.uncharted.terarium.esingest; - -import java.lang.reflect.Constructor; -import java.util.ArrayList; -import java.util.List; - -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.ApplicationRunner; -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.PropertySource; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.Config; -import software.uncharted.terarium.esingest.ingests.IElasticIngest; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.service.ElasticIngestService; - -@SpringBootApplication -@Slf4j -@PropertySource("classpath:application.properties") -public class ElasticIngestApplication { - - @Autowired - ElasticIngestService esIngestService; - - @Autowired - Config config; - - public static void main(String[] args) { - SpringApplication.run(ElasticIngestApplication.class, args); - } - - @Bean - public ApplicationRunner applicationRunner() { - return args -> { - - if (config.getIngestParams().size() == 0) { - log.error("No ingest parameters configured. Exiting..."); - System.exit(1); - } - - List ingests = new ArrayList<>(); - - for (ElasticIngestParams params : config.getIngestParams()) { - log.info("Loading ingest class: {}", params.getIngestClass()); - - Class ingestClass = Class.forName(params.getIngestClass()); - Constructor constructor = ingestClass.getConstructor(); - IElasticIngest ingest = (IElasticIngest) constructor.newInstance(); - ingests.add(ingest); - } - - for (int i = 0; i < config.getIngestParams().size(); i++) { - - ElasticIngestParams params = config.getIngestParams().get(i); - IElasticIngest ingest = ingests.get(i); - - esIngestService.ingest(params, ingest); - } - - esIngestService.shutdown(); - - log.info("Shutting down the application..."); - System.exit(0); - }; - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/Config.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/Config.java deleted file mode 100644 index 54358e7a8d..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/Config.java +++ /dev/null @@ -1,56 +0,0 @@ -package software.uncharted.terarium.esingest.configuration; - -import java.util.List; - -import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.context.annotation.Configuration; - -import lombok.Data; -import lombok.NoArgsConstructor; -import lombok.experimental.Accessors; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; - -@Configuration -@ConfigurationProperties(prefix = "terarium") -@Data -@Accessors(chain = true) -@NoArgsConstructor -public class Config { - - /** - * Ingests configured for the app. - */ - List ingestParams; - - /** - * Amazon configuration - */ - Amazon amazon; - - /* - * S3 Storage related config - */ - String fileStorageS3BucketName; - - @Data - @Accessors(chain = true) - public static class AmazonCredentials { - String accessKey; - String secretKey; - } - - @Data - @Accessors(chain = true) - public static class AmazonS3 { - String region; - String url; - String credentialsId; - } - - @Data - @Accessors(chain = true) - public static class Amazon { - AmazonCredentials credential; - AmazonS3 s3; - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/ConfigGetter.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/ConfigGetter.java deleted file mode 100644 index f8928035f1..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/ConfigGetter.java +++ /dev/null @@ -1,20 +0,0 @@ -package software.uncharted.terarium.esingest.configuration; - -import org.springframework.stereotype.Component; - -import lombok.extern.slf4j.Slf4j; - -@Slf4j -@Component -public class ConfigGetter { - - private static Config config; - - public ConfigGetter(Config c) { - ConfigGetter.config = c; - } - - public static Config getConfig() { - return config; - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/ElasticsearchConfiguration.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/ElasticsearchConfiguration.java deleted file mode 100644 index 9dbcec7c2e..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/configuration/ElasticsearchConfiguration.java +++ /dev/null @@ -1,36 +0,0 @@ -package software.uncharted.terarium.esingest.configuration; - -import org.springframework.beans.factory.annotation.Value; -import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.context.annotation.Configuration; - -import lombok.Data; -import lombok.experimental.Accessors; - -@Configuration -@ConfigurationProperties(prefix = "terarium.elasticsearch") -@Data -@Accessors(chain = true) -public class ElasticsearchConfiguration { - String url; - - @Value("${terarium.elasticsearch.auth_enabled:false}") - boolean authEnabled; - - String username; - - String password; - - Index index; - - public record Index( - String prefix, - String suffix, - String searchableDocumentRoot, - String searchableModelRoot) { - } - - public String getIndex(String root) { - return String.join("_", index.prefix, root, index.suffix); - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/IElasticIngest.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/IElasticIngest.java deleted file mode 100644 index eecda00694..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/IElasticIngest.java +++ /dev/null @@ -1,16 +0,0 @@ -package software.uncharted.terarium.esingest.ingests; - -import java.util.List; - -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; - -public interface IElasticIngest { - - public void setup(final ElasticIngestParams params); - - public void teardown(final ElasticIngestParams params); - - public List> getPasses(); -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/IElasticPass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/IElasticPass.java deleted file mode 100644 index daeb8d9239..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/IElasticPass.java +++ /dev/null @@ -1,22 +0,0 @@ -package software.uncharted.terarium.esingest.ingests; - -import java.io.IOException; -import java.util.List; - -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; - -public interface IElasticPass { - - public void setup(final ElasticIngestParams params); - - public void teardown(final ElasticIngestParams params); - - public String message(); - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException; - - public List process(List input); -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentAddEmbeddingsPass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentAddEmbeddingsPass.java deleted file mode 100644 index 5cf56066bc..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentAddEmbeddingsPass.java +++ /dev/null @@ -1,111 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.climate; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONLineIterator; -import software.uncharted.terarium.esingest.models.input.climate.DocumentEmbedding; -import software.uncharted.terarium.esingest.models.output.Embedding; -import software.uncharted.terarium.esingest.models.output.document.Document; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; - -@Slf4j -public class DocumentAddEmbeddingsPass - implements IElasticPass { - - final String EMBEDDING_PATH = "embeddings"; - ConcurrentBiMap uuidLookup; - - DocumentAddEmbeddingsPass(ConcurrentBiMap uuidLookup) { - this.uuidLookup = uuidLookup; - } - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting embeddings"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path embeddingsPath = Paths.get(params.getInputDir()).resolve(EMBEDDING_PATH); - - return new JSONLineIterator<>(embeddingsPath, DocumentEmbedding.class, - - // NOTE: we want to upload _all_ embedding chunks in a single payload, so we - // need to ensure that when a worker receives the embeddings, it has all the - // embeddings for a single document and it is not split between workers. - - (List batch, DocumentEmbedding latestToAdd) -> { - // if we are under the batch size, don't chunk - if (batch.size() < params.getBatchSize()) { - return false; - } - - // if we are over, only split if the newest item is for a different doc - - DocumentEmbedding last = batch.get(batch.size() - 1); - - // do not chunk unless we have different doc ids - return !last.getId().equals(latestToAdd.getId()); - }); - } - - private Embedding process(DocumentEmbedding input) { - Embedding embedding = new Embedding(); - embedding.setEmbeddingId(input.getEmbeddingChunkId()); - embedding.setSpans(input.getSpans()); - embedding.setVector(input.getEmbedding()); - return embedding; - } - - public List process(List input) { - List output = new ArrayList<>(); - List embeddings = new ArrayList<>(); - UUID currentId = null; - - for (DocumentEmbedding in : input) { - Embedding embedding = process(in); - if (embedding == null) { - continue; - } - - if (!uuidLookup.containsKey(in.getId())) { - // no amr for this model - continue; - } - - UUID uuid = uuidLookup.get(in.getId()); - - if (currentId == null) { - // create a new partial - currentId = uuid; - } else if (!currentId.equals(uuid)) { - // embedding references a new doc, add existing partial to output, create next - // one - Document doc = new Document(); - doc.setId(uuid); - doc.setEmbeddings(embeddings); - output.add(doc); - - currentId = uuid; - embeddings = new ArrayList<>(); - } - - embeddings.add(embedding); - } - return output; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentIngest.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentIngest.java deleted file mode 100644 index be6a1dcfaf..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentIngest.java +++ /dev/null @@ -1,30 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.climate; - -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticIngest; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; - -@Slf4j -public class DocumentIngest implements IElasticIngest { - - ConcurrentBiMap uuidLookup = new ConcurrentBiMap<>(); - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public List> getPasses() { - return List.of( - new DocumentInsertSourcePass(uuidLookup), - new DocumentAddEmbeddingsPass(uuidLookup)); - - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentInsertSourcePass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentInsertSourcePass.java deleted file mode 100644 index 43e74d57e0..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/climate/DocumentInsertSourcePass.java +++ /dev/null @@ -1,90 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.climate; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.Config; -import software.uncharted.terarium.esingest.configuration.ConfigGetter; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONLineIterator; -import software.uncharted.terarium.esingest.models.input.climate.DocumentSource; -import software.uncharted.terarium.esingest.models.output.document.Document; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.service.s3.S3Service; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; -import software.uncharted.terarium.esingest.util.UUIDUtil; - -@Slf4j -public class DocumentInsertSourcePass - implements IElasticPass { - - final String DOCUMENT_PATH = "documents"; - ConcurrentBiMap uuidLookup; - - DocumentInsertSourcePass(ConcurrentBiMap uuidLookup) { - this.uuidLookup = uuidLookup; - } - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting documents"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path documentPath = Paths.get(params.getInputDir()).resolve(DOCUMENT_PATH); - - return new JSONLineIterator<>(documentPath, DocumentSource.class, params.getBatchSize()); - } - - private String getPath(final UUID id, final String filename) { - return String.join("/", "document", id.toString(), filename); - } - - public List process(List input) { - - Config config = ConfigGetter.getConfig(); - S3Service s3Service = new S3Service(config.getAmazon()); - - List res = new ArrayList<>(); - for (DocumentSource in : input) { - - UUID uuid = UUIDUtil.generateSeededUUID(in.getId()); - if (uuidLookup.containsValue(uuid)) { - log.warn("Duplicate UUID generated for document: {}, generating non-deterministic id instead", - in.getId()); - uuid = UUID.randomUUID(); - } - uuidLookup.put(in.getId(), uuid); - - Document doc = new Document(); - doc.setId(uuid); - doc.setName(in.getSource().getTitle()); - doc.setDescription(in.getSource().getTitle()); - doc.setText(in.getSource().getContents()); - doc.setDoi(List.of(in.getSource().getDoi())); - - final String filename = "source.txt"; - doc.setFileNames(List.of(filename)); - - final String bucket = config.getFileStorageS3BucketName(); - final String key = getPath(uuid, filename); - - s3Service.putObject(bucket, key, in.getSource().getContents().getBytes()); - - res.add(doc); - } - return res; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentAddEmbeddingsPass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentAddEmbeddingsPass.java deleted file mode 100644 index c690bd0e30..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentAddEmbeddingsPass.java +++ /dev/null @@ -1,100 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.epi; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONLineIterator; -import software.uncharted.terarium.esingest.models.input.epi.DocumentEmbedding; -import software.uncharted.terarium.esingest.models.output.Embedding; -import software.uncharted.terarium.esingest.models.output.document.Document; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; - -@Slf4j -public class DocumentAddEmbeddingsPass - implements IElasticPass { - - final String EMBEDDING_PATH = "embeddings"; - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting embeddings"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path embeddingsPath = Paths.get(params.getInputDir()).resolve(EMBEDDING_PATH); - - return new JSONLineIterator<>(embeddingsPath, DocumentEmbedding.class, - - // NOTE: we want to upload _all_ embedding chunks in a single payload, so we - // need to ensure that when a worker receives the embeddings, it has all the - // embeddings for a single document and it is not split between workers. - - (List batch, DocumentEmbedding latestToAdd) -> { - // if we are under the batch size, don't chunk - if (batch.size() < params.getBatchSize()) { - return false; - } - - // if we are over, only split if the newest item is for a different doc - - DocumentEmbedding last = batch.get(batch.size() - 1); - - // do not chunk unless we have different doc ids - return !last.getId().equals(latestToAdd.getId()); - }); - } - - private Embedding process(DocumentEmbedding input) { - Embedding embedding = new Embedding(); - embedding.setEmbeddingId(input.getEmbeddingChunkId()); - embedding.setSpans(input.getSpans()); - embedding.setVector(input.getEmbedding()); - return embedding; - } - - public List process(List input) { - List output = new ArrayList<>(); - List embeddings = new ArrayList<>(); - UUID currentId = null; - - for (DocumentEmbedding in : input) { - Embedding embedding = process(in); - if (embedding == null) { - continue; - } - - UUID uuid = UUID.fromString(in.getId()); - - if (currentId == null) { - // create a new partial - currentId = uuid; - } else if (!currentId.equals(uuid)) { - // embedding references a new doc, add existing partial to output, create next - // one - Document doc = new Document(); - doc.setId(uuid); - doc.setEmbeddings(embeddings); - output.add(doc); - - currentId = uuid; - embeddings = new ArrayList<>(); - } - - embeddings.add(embedding); - } - return output; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentIngest.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentIngest.java deleted file mode 100644 index fd4ba60e3a..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentIngest.java +++ /dev/null @@ -1,26 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.epi; - -import java.util.List; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticIngest; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; - -@Slf4j -public class DocumentIngest implements IElasticIngest { - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public List> getPasses() { - return List.of( - new DocumentInsertSourcePass(), - new DocumentAddEmbeddingsPass()); - - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentInsertSourcePass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentInsertSourcePass.java deleted file mode 100644 index d4c03e256c..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/epi/DocumentInsertSourcePass.java +++ /dev/null @@ -1,77 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.epi; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.Config; -import software.uncharted.terarium.esingest.configuration.ConfigGetter; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONLineIterator; -import software.uncharted.terarium.esingest.models.input.epi.DocumentSource; -import software.uncharted.terarium.esingest.models.output.document.Document; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.service.s3.S3Service; - -@Slf4j -public class DocumentInsertSourcePass - implements IElasticPass { - - final String DOCUMENT_PATH = "documents"; - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting documents"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path documentPath = Paths.get(params.getInputDir()).resolve(DOCUMENT_PATH); - - return new JSONLineIterator<>(documentPath, DocumentSource.class, params.getBatchSize()); - } - - private String getPath(final UUID id, final String filename) { - return String.join("/", "document", id.toString(), filename); - } - - public List process(List input) { - - Config config = ConfigGetter.getConfig(); - S3Service s3Service = new S3Service(config.getAmazon()); - - List res = new ArrayList<>(); - for (DocumentSource in : input) { - - UUID id = UUID.fromString(in.getId()); - - Document doc = new Document(); - doc.setId(id); - doc.setName(in.getSource().getTitle()); - doc.setDescription(in.getSource().getTitle()); - doc.setText(in.getSource().getBody()); - doc.setDoi(in.getSource().getFeature().getDoi()); - - final String filename = "source.txt"; - doc.setFileNames(List.of(filename)); - - final String bucket = config.getFileStorageS3BucketName(); - final String key = getPath(id, filename); - - s3Service.putObject(bucket, key, in.getSource().getBody().getBytes()); - - res.add(doc); - } - return res; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelAddEmbeddingsPass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelAddEmbeddingsPass.java deleted file mode 100644 index 43a4b6cfb1..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelAddEmbeddingsPass.java +++ /dev/null @@ -1,73 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.model; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONLineIterator; -import software.uncharted.terarium.esingest.models.input.model.ModelEmbedding; -import software.uncharted.terarium.esingest.models.output.Embedding; -import software.uncharted.terarium.esingest.models.output.model.Model; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; - -@Slf4j -public class ModelAddEmbeddingsPass - implements IElasticPass { - - final String EMBEDDING_PATH = "embeddings"; - - ConcurrentBiMap uuidLookup; - - ModelAddEmbeddingsPass(ConcurrentBiMap uuidLookup) { - this.uuidLookup = uuidLookup; - } - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting model cards and embeddings"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path embeddingPath = Paths.get(params.getInputDir()).resolve(EMBEDDING_PATH); - - return new JSONLineIterator<>(embeddingPath, ModelEmbedding.class, params.getBatchSize()); - } - - public List process(List input) { - List res = new ArrayList<>(); - for (ModelEmbedding in : input) { - - if (!uuidLookup.containsKey(in.getId())) { - // no amr for this model - continue; - } - - UUID uuid = uuidLookup.get(in.getId()); - - Model doc = new Model(); - doc.setId(uuid); - doc.getMetadata().setGollmCard(in.getModelCard()); - - Embedding embedding = new Embedding(); - embedding.setEmbeddingId(UUID.randomUUID().toString()); - embedding.setVector(in.getEmbedding()); - - doc.setEmbeddings(List.of(embedding)); - res.add(doc); - } - return res; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelAddMetadataPass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelAddMetadataPass.java deleted file mode 100644 index 1cc7997515..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelAddMetadataPass.java +++ /dev/null @@ -1,76 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.model; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONKeyIterator; -import software.uncharted.terarium.esingest.models.input.model.ModelMetadata; -import software.uncharted.terarium.esingest.models.output.model.Model; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; - -@Slf4j -public class ModelAddMetadataPass - implements IElasticPass { - - final ObjectMapper mapper = new ObjectMapper(); - final String MODEL_PATH = "models"; - - ConcurrentBiMap uuidLookup; - - ModelAddMetadataPass(ConcurrentBiMap uuidLookup) { - this.uuidLookup = uuidLookup; - } - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting metadata into models"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path modelPath = Paths.get(params.getInputDir()).resolve(MODEL_PATH); - - return new JSONKeyIterator<>(modelPath, ModelMetadata.class, params.getBatchSize()); - } - - public List process(List input) { - List res = new ArrayList<>(); - for (ModelMetadata in : input) { - - if (!uuidLookup.containsKey(in.getId())) { - // no amr for this model - continue; - } - - UUID uuid = uuidLookup.get(in.getId()); - - Model doc = new Model(); - doc.setId(uuid); - doc.getMetadata().setTitle(in.getPublicationMetadata().getTitle()); - doc.getMetadata().setDoi(in.getPublicationMetadata().getDoi()); - doc.getMetadata().setType(in.getPublicationMetadata().getType()); - doc.getMetadata().setIssn(in.getPublicationMetadata().getIssn()); - doc.getMetadata().setJournal(in.getPublicationMetadata().getJournal()); - doc.getMetadata().setPublisher(in.getPublicationMetadata().getPublisher()); - doc.getMetadata().setYear(in.getPublicationMetadata().getYear()); - doc.getMetadata().setAuthor(in.getPublicationMetadata().getAuthor()); - res.add(doc); - } - return res; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelIngest.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelIngest.java deleted file mode 100644 index 2aaf100504..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelIngest.java +++ /dev/null @@ -1,30 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.model; - -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticIngest; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; - -@Slf4j -public class ModelIngest implements IElasticIngest { - - ConcurrentBiMap uuidLookup = new ConcurrentBiMap<>(); - - public void setup(ElasticIngestParams params) { - } - - public void teardown(ElasticIngestParams params) { - } - - public List> getPasses() { - return List.of( - new ModelInsertAMRPass(uuidLookup), - new ModelAddEmbeddingsPass(uuidLookup), - new ModelAddMetadataPass(uuidLookup)); - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelInsertAMRPass.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelInsertAMRPass.java deleted file mode 100644 index 0b0515e7b9..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/ingests/model/ModelInsertAMRPass.java +++ /dev/null @@ -1,78 +0,0 @@ -package software.uncharted.terarium.esingest.ingests.model; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.iterators.JSONFileIterator; -import software.uncharted.terarium.esingest.models.input.model.ModelAMR; -import software.uncharted.terarium.esingest.models.output.model.Model; -import software.uncharted.terarium.esingest.service.ElasticIngestParams; -import software.uncharted.terarium.esingest.util.ConcurrentBiMap; -import software.uncharted.terarium.esingest.util.UUIDUtil; - -@Slf4j -public class ModelInsertAMRPass - implements IElasticPass { - - final String AMR_PATH = "amr"; - - ConcurrentBiMap uuidLookup; - - ModelInsertAMRPass(ConcurrentBiMap uuidLookup) { - this.uuidLookup = uuidLookup; - } - - public void setup(final ElasticIngestParams params) { - } - - public void teardown(final ElasticIngestParams params) { - } - - public String message() { - return "Inserting AMRs into models"; - } - - public IInputIterator getIterator(final ElasticIngestParams params) throws IOException { - Path amrPath = Paths.get(params.getInputDir()).resolve(AMR_PATH); - - return new JSONFileIterator<>(amrPath, ModelAMR.class, params.getBatchSize()); - } - - public List process(List input) { - List res = new ArrayList<>(); - for (ModelAMR in : input) { - - if (in.getHeader() == null || in.getModel() == null) { - // no model for this amr - continue; - } - - UUID uuid = UUIDUtil.generateSeededUUID(in.getId()); - if (uuidLookup.containsValue(uuid)) { - log.warn("Duplicate UUID generated for document: {}, generating non-deterministic id instead", - in.getId()); - uuid = UUID.randomUUID(); - } - uuidLookup.put(in.getId(), uuid); - - Model doc = new Model(); - doc.setId(uuid); - doc.setHeader(in.getHeader()); - doc.setModel(in.getModel()); - doc.setSemantics(in.getSemantics()); - if (in.getMetadata() != null) { - doc.getMetadata().setAnnotations(in.getMetadata().get("annotations")); - } - res.add(doc); - } - return res; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/IInputIterator.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/IInputIterator.java deleted file mode 100644 index 67e8c8a883..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/IInputIterator.java +++ /dev/null @@ -1,12 +0,0 @@ -package software.uncharted.terarium.esingest.iterators; - -import java.io.IOException; -import java.util.List; - -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -public interface IInputIterator { - - public List getNext() throws IOException; - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONFileIterator.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONFileIterator.java deleted file mode 100644 index 8e929656e9..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONFileIterator.java +++ /dev/null @@ -1,99 +0,0 @@ -package software.uncharted.terarium.esingest.iterators; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Queue; -import java.util.function.BiFunction; - -import com.fasterxml.jackson.core.json.JsonReadFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.util.FileUtil; - -@Slf4j -public class JSONFileIterator implements IInputIterator { - - ObjectMapper mapper; - Queue files; - long batchSize; - BiFunction, T, Boolean> batcher; - List results = new ArrayList<>(); - Class classType; - - public JSONFileIterator(Path inputPath, Class classType, long batchSize) throws IOException { - this.batchSize = batchSize; - this.classType = classType; - this.files = new LinkedList<>(FileUtil.getJsonFiles(inputPath)); - if (files.isEmpty()) { - throw new IOException("No input files found for path: " + inputPath.toString()); - } - this.mapper = new ObjectMapper(); - this.mapper.enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS.mappedFeature()); - } - - public JSONFileIterator(Path inputPath, Class classType, BiFunction, T, Boolean> batcher) - throws IOException { - this.batcher = batcher; - this.classType = classType; - this.files = new LinkedList<>(FileUtil.getJSONLineFilesInDir(inputPath)); - if (files.isEmpty()) { - throw new IOException("No input files found for path: " + inputPath.toString()); - } - this.mapper = new ObjectMapper(); - this.mapper.enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS.mappedFeature()); - } - - private List returnAndClearResults(T toAddAfterClear) { - if (results.isEmpty()) { - return null; // signal there is no more data - } - List ret = new ArrayList<>(results); - results = new ArrayList<>(); - if (toAddAfterClear != null) { - results.add(toAddAfterClear); - } - return ret; - } - - private List returnAndClearResults() { - return returnAndClearResults(null); - } - - public List getNext() throws IOException { - while (true) { - if (files.isEmpty()) { - // done - return returnAndClearResults(); - } - - Path file = files.poll(); - String content = Files.readString(file); - - T doc = mapper.readValue(content, classType); - if (doc.getId() == null || doc.getId().isEmpty()) { - doc.setId(FileUtil.getFilenameWithoutExtension(file.getFileName().toString())); - } - if (batcher != null) { - // check if we need to split the batch yet - - boolean splitBatch = batcher.apply(results, doc); - if (splitBatch) { - return returnAndClearResults(doc); - } - results.add(doc); - } else { - // static batch size - results.add(doc); - if (results.size() == batchSize) { - return returnAndClearResults(); - } - } - } - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONKeyIterator.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONKeyIterator.java deleted file mode 100644 index 7a448ed1d6..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONKeyIterator.java +++ /dev/null @@ -1,89 +0,0 @@ -package software.uncharted.terarium.esingest.iterators; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Queue; - -import com.fasterxml.jackson.core.json.JsonReadFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.util.FileUtil; - -@Slf4j -public class JSONKeyIterator implements IInputIterator { - - ObjectMapper mapper; - Queue files; - long batchSize; - Class classType; - ObjectNode currentNode; - - public JSONKeyIterator(Path inputPath, Class classType, long batchSize) throws IOException { - this.batchSize = batchSize; - this.classType = classType; - this.files = new LinkedList<>(FileUtil.getJsonFiles(inputPath)); - if (files.isEmpty()) { - throw new IOException("No input files found for path: " + inputPath.toString()); - } - this.mapper = new ObjectMapper(); - this.mapper.enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS.mappedFeature()); - this.currentNode = (ObjectNode) this.mapper.readTree(Files.readString(files.poll())); - } - - public List getNext() throws IOException { - - List results = new ArrayList<>(); - - while (true) { - if (currentNode == null) { - // done - if (results.isEmpty()) { - return null; - } - return results; - } - - Iterator keys = currentNode.fieldNames(); - List keysToRemove = new ArrayList<>(); - - while (keys.hasNext()) { - String key = keys.next(); - - T doc = mapper.readValue(currentNode.get(key).toString(), classType); - if (doc.getId() == null || doc.getId().isEmpty()) { - doc.setId(key); - } - results.add(doc); - - if (results.size() == batchSize) { - // done this batch - - // remove the keys we have processed - for (String k : keysToRemove) { - currentNode.remove(k); - } - return results; - } - - // If you want to remove this key, add it to keysToRemove - keysToRemove.add(key); - } - - // we are done this node - this.currentNode = null; - - // load next node - if (!files.isEmpty()) { - this.currentNode = (ObjectNode) mapper.readTree(Files.readString(files.poll())); - } - } - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONLineIterator.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONLineIterator.java deleted file mode 100644 index 0921b422a5..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/iterators/JSONLineIterator.java +++ /dev/null @@ -1,108 +0,0 @@ -package software.uncharted.terarium.esingest.iterators; - -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Queue; -import java.util.function.BiFunction; - -import com.fasterxml.jackson.core.json.JsonReadFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.util.FileUtil; - -@Slf4j -public class JSONLineIterator implements IInputIterator { - - ObjectMapper mapper; - Queue files; - BufferedReader reader; - long batchSize; - BiFunction, T, Boolean> batcher; - List results = new ArrayList<>(); - Class classType; - - public JSONLineIterator(Path inputPath, Class classType, long batchSize) throws IOException { - this.batchSize = batchSize; - this.classType = classType; - this.files = new LinkedList<>(FileUtil.getJSONLineFilesInDir(inputPath)); - if (files.isEmpty()) { - throw new IOException("No input files found for path: " + inputPath.toString()); - } - this.reader = Files.newBufferedReader(files.poll()); - this.mapper = new ObjectMapper(); - this.mapper.enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS.mappedFeature()); - } - - public JSONLineIterator(Path inputPath, Class classType, BiFunction, T, Boolean> batcher) - throws IOException { - this.classType = classType; - this.batcher = batcher; - this.files = new LinkedList<>(FileUtil.getJSONLineFilesInDir(inputPath)); - if (files.isEmpty()) { - throw new IOException("No input files found for path: " + inputPath.toString()); - } - this.reader = Files.newBufferedReader(files.poll()); - this.mapper = new ObjectMapper(); - this.mapper.enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS.mappedFeature()); - } - - private List returnAndClearResults(T toAddAfterClear) { - if (results.isEmpty()) { - return null; // signal there is no more data - } - List ret = new ArrayList<>(results); - results = new ArrayList<>(); - if (toAddAfterClear != null) { - results.add(toAddAfterClear); - } - return ret; - } - - private List returnAndClearResults() { - return returnAndClearResults(null); - } - - public List getNext() throws IOException { - while (true) { - if (reader == null) { - // done - return returnAndClearResults(); - } - - for (String line; (line = reader.readLine()) != null;) { - T doc = mapper.readValue(line, this.classType); - if (batcher != null) { - // check if we need to split the batch yet - - boolean splitBatch = batcher.apply(results, doc); - if (splitBatch) { - return returnAndClearResults(doc); - } - results.add(doc); - } else { - // static batch size - results.add(doc); - if (results.size() == batchSize) { - return returnAndClearResults(); - } - } - } - - // done with this file - reader.close(); - reader = null; - - if (!files.isEmpty()) { - // go to next file - reader = Files.newBufferedReader(files.poll()); - } - } - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/IInputDocument.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/IInputDocument.java deleted file mode 100644 index 5d90b64791..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/IInputDocument.java +++ /dev/null @@ -1,9 +0,0 @@ -package software.uncharted.terarium.esingest.models.input; - -public interface IInputDocument { - - void setId(String id); - - String getId(); - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/climate/DocumentEmbedding.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/climate/DocumentEmbedding.java deleted file mode 100644 index 2eb8d56b98..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/climate/DocumentEmbedding.java +++ /dev/null @@ -1,23 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.climate; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class DocumentEmbedding implements IInputDocument { - - @JsonProperty("doc_id") - private String id; - - @JsonProperty("uuid") - private String embeddingChunkId; - - private long[] spans; - private String title; - private String doi; - private double[] embedding; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/climate/DocumentSource.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/climate/DocumentSource.java deleted file mode 100644 index dcc9e89d64..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/climate/DocumentSource.java +++ /dev/null @@ -1,27 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.climate; - -import com.fasterxml.jackson.annotation.JsonAlias; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class DocumentSource implements IInputDocument { - - @Data - @JsonIgnoreProperties(ignoreUnknown = true) - static public class Source { - - private String title; - private String contents; - private String doi; - } - - @JsonAlias("_id") - String id; - - @JsonAlias("_source") - Source source; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/epi/DocumentEmbedding.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/epi/DocumentEmbedding.java deleted file mode 100644 index 088c6e2a1f..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/epi/DocumentEmbedding.java +++ /dev/null @@ -1,25 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.epi; - -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class DocumentEmbedding implements IInputDocument { - - @JsonProperty("doc_id") - private String id; - - @JsonProperty("uuid") - private String embeddingChunkId; - - private long[] spans; - private String title; - private List doi; - private double[] embedding; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/epi/DocumentSource.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/epi/DocumentSource.java deleted file mode 100644 index 567ac05b6f..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/epi/DocumentSource.java +++ /dev/null @@ -1,51 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.epi; - -import java.sql.Timestamp; -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonAlias; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class DocumentSource implements IInputDocument { - - @Data - @JsonIgnoreProperties(ignoreUnknown = true) - static public class Source { - private String title; - - private String body; - - private Feature feature; - } - - @Data - @JsonIgnoreProperties(ignoreUnknown = true) - static public class Feature { - private List date; - - private List website; - - private List doi; - - private List language; - - private List version; - - private List pubname; - - private List organization; - - private List name; - } - - @JsonAlias("_id") - String id; - - @JsonAlias("_source") - Source source; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelAMR.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelAMR.java deleted file mode 100644 index 1a505d1f93..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelAMR.java +++ /dev/null @@ -1,18 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.JsonNode; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class ModelAMR implements IInputDocument { - - private String id; - private JsonNode header; - private JsonNode model; - private JsonNode semantics; - private JsonNode metadata; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelEmbedding.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelEmbedding.java deleted file mode 100644 index cbe231c94c..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelEmbedding.java +++ /dev/null @@ -1,18 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -@JsonDeserialize(using = ModelEmbeddingDeserializer.class) -public class ModelEmbedding implements IInputDocument { - - private String id; - private double[] embedding; - private JsonNode modelCard; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelEmbeddingDeserializer.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelEmbeddingDeserializer.java deleted file mode 100644 index 995946f03d..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelEmbeddingDeserializer.java +++ /dev/null @@ -1,55 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.model; - -import java.io.IOException; -import java.util.Iterator; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -public class ModelEmbeddingDeserializer extends JsonDeserializer { - - @Override - public ModelEmbedding deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) - throws IOException { - - ObjectMapper mapper = (ObjectMapper) jsonParser.getCodec(); - JsonNode node = mapper.readTree(jsonParser); - - ModelEmbedding embedding = new ModelEmbedding(); - - // The only top level key is the ID - Iterator fieldNames = node.fieldNames(); - - JsonNode body = null; - if (fieldNames.hasNext()) { - String id = fieldNames.next(); - // Use firstKey - embedding.setId(id); - - body = node.get(id); - } - - if (body == null) { - throw new IOException("Expected a top level key"); - } - - embedding.setModelCard(body.get("response")); - - JsonNode embeddingsNode = body.get("embedding"); - - if (!embeddingsNode.isArray()) { - throw new IOException("Expected an \"embedding\" array"); - } - - double[] embeddings = new double[embeddingsNode.size()]; - for (int i = 0; i < embeddingsNode.size(); i++) { - embeddings[i] = embeddingsNode.get(i).asDouble(); - } - embedding.setEmbedding(embeddings); - - return embedding; - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelMetadata.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelMetadata.java deleted file mode 100644 index 2273f50616..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/input/model/ModelMetadata.java +++ /dev/null @@ -1,40 +0,0 @@ -package software.uncharted.terarium.esingest.models.input.model; - -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Data -@JsonIgnoreProperties(ignoreUnknown = true) -public class ModelMetadata implements IInputDocument { - - @Data - @JsonIgnoreProperties(ignoreUnknown = true) - static public class Author { - private String given; - private String family; - private String name; - }; - - @Data - @JsonIgnoreProperties(ignoreUnknown = true) - static public class PublicationMetadata { - private String title; - private String doi; - private String type; - private List issn; - private String journal; - private String publisher; - private String year; - private List author; - }; - - private String id; - - @JsonProperty("publication_metadata") - private PublicationMetadata publicationMetadata; -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/Embedding.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/Embedding.java deleted file mode 100644 index 91788a0fb0..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/Embedding.java +++ /dev/null @@ -1,12 +0,0 @@ -package software.uncharted.terarium.esingest.models.output; - -import lombok.Data; - -@Data -public class Embedding { - - private String embeddingId; - private double[] vector; - private long[] spans; - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/IOutputDocument.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/IOutputDocument.java deleted file mode 100644 index 52c391ba9f..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/IOutputDocument.java +++ /dev/null @@ -1,15 +0,0 @@ -package software.uncharted.terarium.esingest.models.output; - -import java.util.List; -import java.util.UUID; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -public interface IOutputDocument { - - @JsonIgnore - UUID getId(); - - void addTopics(List topics); - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/document/Document.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/document/Document.java deleted file mode 100644 index 58f6cf2955..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/document/Document.java +++ /dev/null @@ -1,41 +0,0 @@ -package software.uncharted.terarium.esingest.models.output.document; - -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonInclude.Include; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.output.Embedding; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; - -@Data -@JsonInclude(Include.NON_EMPTY) -public class Document implements IOutputDocument { - - private UUID id; - private String name; - private String description; - private String text; - private List doi; - private List fileNames; - - private Timestamp createdOn = new Timestamp(System.currentTimeMillis()); - private Timestamp updatedOn = createdOn; - private Timestamp deletedOn = null; - private Boolean temporary = false; - private Boolean publicAsset = true; - private List embeddings; - private List topics; - - public void addTopics(List ts) { - if (topics == null) { - topics = new ArrayList<>(); - } - topics.addAll(ts); - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/model/Model.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/model/Model.java deleted file mode 100644 index b0f2113212..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/models/output/model/Model.java +++ /dev/null @@ -1,60 +0,0 @@ -package software.uncharted.terarium.esingest.models.output.model; - -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonInclude.Include; -import com.fasterxml.jackson.databind.JsonNode; - -import lombok.Data; -import software.uncharted.terarium.esingest.models.input.model.ModelMetadata.Author; -import software.uncharted.terarium.esingest.models.output.Embedding; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; - -@Data -@JsonInclude(Include.NON_EMPTY) -public class Model implements IOutputDocument { - - @Data - @JsonInclude(Include.NON_EMPTY) - static public class Metadata { - private String title; - private String doi; - private String type; - private List issn; - private String journal; - private String publisher; - private String year; - private List author; - private Object gollmCard; - private JsonNode annotations; - } - - private UUID id; - - private JsonNode header; - private JsonNode model; - private JsonNode semantics; - private Metadata metadata = new Metadata(); - - private Timestamp createdOn = new Timestamp(System.currentTimeMillis()); - private Timestamp updatedOn = createdOn; - private Timestamp deletedOn = null; - - private Boolean temporary = false; - private Boolean publicAsset = true; - - private List embeddings; - private List topics; - - public void addTopics(List ts) { - if (topics == null) { - topics = new ArrayList<>(); - } - topics.addAll(ts); - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ConcurrentWorkerService.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ConcurrentWorkerService.java deleted file mode 100644 index 43302abb71..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ConcurrentWorkerService.java +++ /dev/null @@ -1,114 +0,0 @@ -package software.uncharted.terarium.esingest.service; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.BiConsumer; - -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Service; - -import jakarta.annotation.PostConstruct; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.iterators.IInputIterator; -import software.uncharted.terarium.esingest.models.input.IInputDocument; - -@Service -@Slf4j -@RequiredArgsConstructor -public class ConcurrentWorkerService { - - @Value("${terarium.esingest.workerPoolSize:8}") - private int POOL_SIZE; - - @Value("${terarium.esingest.workTimeoutSeconds:60}") - private int WORK_TIMEOUT_SECONDS; - - private ExecutorService executor; - private List> futures = new ArrayList<>(); - private AtomicBoolean shouldStop = new AtomicBoolean(false); - - @PostConstruct - void init() { - executor = Executors.newFixedThreadPool(POOL_SIZE); - } - - protected void startWorkers(BlockingQueue> queue, - BiConsumer, Long> task) { - for (int i = 0; i < POOL_SIZE; i++) { - futures.add(executor.submit(() -> { - while (true) { - try { - long start = System.currentTimeMillis(); - List args = queue.take(); - if (args.size() == 0) { - break; - } - task.accept(args, System.currentTimeMillis() - start); - - } catch (Exception e) { - log.error("Error processing work", e); - shouldStop.set(true); - throw e; - } - } - return null; - })); - } - } - - protected void waitUntilWorkersAreDone(BlockingQueue> queue) - throws InterruptedException, ExecutionException { - - // now lets dispatch the worker kill signals (empty lists) - for (int i = 0; i < POOL_SIZE; i++) { - queue.offer(new ArrayList<>(), WORK_TIMEOUT_SECONDS, TimeUnit.SECONDS); - } - - // now we wait for them to finish - for (Future future : futures) { - try { - future.get(); - } catch (Exception e) { - log.error("Error waiting on workers to finish", e); - throw e; - } - } - - futures.clear(); - } - - protected void readWorkIntoQueue(BlockingQueue> queue, - IInputIterator iterator) - throws IOException, InterruptedException { - long lineCount = 0; - while (true) { - List next = iterator.getNext(); - if (next == null) { - // we are done - log.info("No more work for queue"); - break; - } - if (shouldStop.get()) { - throw new InterruptedException("Worker encountered an error, stopping ingest"); - } - // push work to queue - lineCount += next.size(); - log.info("Dispatching {} of {} total lines to work queue", next.size(), lineCount); - queue.offer(next, WORK_TIMEOUT_SECONDS, TimeUnit.SECONDS); - } - } - - public void shutdown() { - executor.shutdown(); - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticIngestParams.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticIngestParams.java deleted file mode 100644 index a03e8cefa7..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticIngestParams.java +++ /dev/null @@ -1,37 +0,0 @@ -package software.uncharted.terarium.esingest.service; - -import java.util.List; - -import lombok.Data; - -@Data -public class ElasticIngestParams { - - // Name of the ingest - String name; - - // The input directory. Ingest expects two child directories: - // - `{terarium.esingest.input-dir}/embeddings/` - // - `{terarium.esingest.input-dir}/documents/` - String inputDir; - - // The output index root to ingest into - String outputIndexRoot; - - // topics to add to each document - List topics; - - // The work queue size, determines how many documents / embeddings can queue up - // while workers are busy - int workQueueSize = 36; - - // The number of documents to fail to ingest before the entire ingest is failed. - int errorsThreshold = 10; - - // The number of documents to process in a single batch. - int batchSize = 200; - - // The classname used for the ingest. - String ingestClass; - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticIngestService.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticIngestService.java deleted file mode 100644 index 1db5768523..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticIngestService.java +++ /dev/null @@ -1,97 +0,0 @@ -package software.uncharted.terarium.esingest.service; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.ExecutionException; - -import org.springframework.stereotype.Service; - -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.ElasticsearchConfiguration; -import software.uncharted.terarium.esingest.ingests.IElasticIngest; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; -import software.uncharted.terarium.esingest.util.TimeFormatter; - -@Service -@Slf4j -@RequiredArgsConstructor -public class ElasticIngestService extends ConcurrentWorkerService { - - private final ElasticInsertService esInsertService; - private final ElasticUpdateService esUpdateService; - private final ElasticsearchConfiguration esConfig; - - public void ingest(ElasticIngestParams params, IElasticIngest ingest) - throws IOException, InterruptedException, ExecutionException { - - log.info("Running ingest: {}", params.getName()); - String indexName = esConfig.getIndex(params.getOutputIndexRoot()); - - try { - - // setup ingest - ingest.setup(params); - - Queue> passes = new LinkedList<>( - ingest.getPasses()); - - if (passes.isEmpty()) { - throw new RuntimeException("No passes found in ingest"); - } - - long start = System.currentTimeMillis(); - List errs = new ArrayList<>(); - - IElasticPass insertPass = passes.poll(); - - log.info("Ingesting documents from {} into {}", params.getInputDir(), indexName); - - long insertStart = System.currentTimeMillis(); - - insertPass.setup(params); - errs.addAll(esInsertService.insertDocuments(params, insertPass)); - insertPass.teardown(params); - - log.info("Ingested documents successfully in {}", - TimeFormatter.format(System.currentTimeMillis() - insertStart)); - - for (IElasticPass updatePass : passes) { - long updateStart = System.currentTimeMillis(); - updatePass.setup(params); - errs.addAll(esUpdateService.updateDocuments(params, updatePass)); - updatePass.teardown(params); - log.info("Updated documents successfully in {}", - TimeFormatter.format(System.currentTimeMillis() - updateStart)); - } - - // teardown ingest - ingest.setup(params); - - log.info( - "Ingest completed successfully in {}", - TimeFormatter.format(System.currentTimeMillis() - start)); - - if (errs.size() > 0) { - log.warn("Ingest encountered {} errors:", errs.size()); - for (String err : errs) { - log.error(err); - - } - } - - } catch (Exception e) { - log.error("Ingest failed", e); - } - } - - public void shutdown() { - esInsertService.shutdown(); - esUpdateService.shutdown(); - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticInsertService.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticInsertService.java deleted file mode 100644 index a6c1b2e6ac..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticInsertService.java +++ /dev/null @@ -1,86 +0,0 @@ -package software.uncharted.terarium.esingest.service; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicLong; - -import org.springframework.stereotype.Service; - -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.ElasticsearchConfiguration; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; - -@Service -@Slf4j -@RequiredArgsConstructor -public class ElasticInsertService extends ConcurrentWorkerService { - - private final ElasticsearchService esService; - private final ElasticsearchConfiguration esConfig; - - public List insertDocuments( - ElasticIngestParams params, - IElasticPass ingest) - throws IOException, InterruptedException, ExecutionException { - - List errors = Collections.synchronizedList(new ArrayList<>()); - BlockingQueue> workQueue = new LinkedBlockingQueue<>(params.getWorkQueueSize()); - - AtomicLong lastTookMs = new AtomicLong(0); - - startWorkers(workQueue, (List items, Long timeWaitingOnQueue) -> { - try { - long start = System.currentTimeMillis(); - List output = ingest.process(items); - for (Output out : output) { - out.addTopics(params.getTopics()); - } - log.info("Processed {} docs in {}ms", output.size(), (System.currentTimeMillis() - start)); - - if (output.isEmpty()) { - log.warn("Batch had not processed output, skipping"); - return; - } - - long sinceLastTook = (System.currentTimeMillis() - start) + timeWaitingOnQueue; - long backpressureWait = lastTookMs.get() - sinceLastTook; - if (backpressureWait > 0) { - // apply backpressure - log.info("Backpressure applied, waiting for {}ms, took was {}ms", backpressureWait, - lastTookMs.get()); - Thread.sleep(backpressureWait); - } - - ElasticsearchService.BulkOpResponse res = esService - .bulkIndex(esConfig.getIndex(params.getOutputIndexRoot()), output); - if (res.getErrors().size() > 0) { - errors.addAll(res.getErrors()); - if (errors.size() > params.getErrorsThreshold()) { - for (String err : errors) { - log.error(err); - } - throw new InterruptedException("Too many errors, stopping ingest"); - } - } - lastTookMs.set(res.getTook()); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - - readWorkIntoQueue(workQueue, ingest.getIterator(params)); - - waitUntilWorkersAreDone(workQueue); - - return errors; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticUpdateService.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticUpdateService.java deleted file mode 100644 index 9f20c61786..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticUpdateService.java +++ /dev/null @@ -1,81 +0,0 @@ -package software.uncharted.terarium.esingest.service; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicLong; - -import org.springframework.stereotype.Service; - -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.ElasticsearchConfiguration; -import software.uncharted.terarium.esingest.ingests.IElasticPass; -import software.uncharted.terarium.esingest.models.input.IInputDocument; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; - -@Service -@Slf4j -@RequiredArgsConstructor -public class ElasticUpdateService extends ConcurrentWorkerService { - - private final ElasticsearchService esService; - private final ElasticsearchConfiguration esConfig; - - public List updateDocuments( - ElasticIngestParams params, - IElasticPass ingest) - throws IOException, InterruptedException, ExecutionException { - - List errors = Collections.synchronizedList(new ArrayList<>()); - - BlockingQueue> workQueue = new LinkedBlockingQueue<>(params.getWorkQueueSize()); - - AtomicLong lastTookMs = new AtomicLong(0); - startWorkers(workQueue, (List items, Long timeWaitingOnQueue) -> { - try { - long start = System.currentTimeMillis(); - - List output = ingest.process(items); - if (output.isEmpty()) { - log.warn("Batch had not processed output, skipping"); - return; - } - - long sinceLastTook = (System.currentTimeMillis() - start) + timeWaitingOnQueue; - long backpressureWait = lastTookMs.get() - sinceLastTook; - if (backpressureWait > 0) { - // apply backpressure - Thread.sleep(backpressureWait); - } - - ElasticsearchService.BulkOpResponse res = esService - .bulkUpdate(esConfig.getIndex(params.getOutputIndexRoot()), output); - if (res.getErrors().size() > 0) { - errors.addAll(res.getErrors()); - if (errors.size() > params.getErrorsThreshold()) { - for (String err : errors) { - log.error(err); - } - throw new InterruptedException("Too many errors, stopping ingest"); - } - } - - lastTookMs.set(res.getTook()); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - - readWorkIntoQueue(workQueue, ingest.getIterator(params)); - - waitUntilWorkersAreDone(workQueue); - - return errors; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticsearchService.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticsearchService.java deleted file mode 100644 index 02100a0d2e..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/ElasticsearchService.java +++ /dev/null @@ -1,475 +0,0 @@ -package software.uncharted.terarium.esingest.service; - -import java.io.IOException; -import java.net.URI; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Base64; -import java.util.List; -import java.util.Map; - -import org.apache.http.Header; -import org.apache.http.HttpHost; -import org.apache.http.message.BasicHeader; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestClientBuilder; -import org.springframework.boot.web.client.RestTemplateBuilder; -import org.springframework.http.HttpEntity; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpMethod; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Service; -import org.springframework.web.client.RestTemplate; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch._types.ErrorCause; -import co.elastic.clients.elasticsearch._types.Refresh; -import co.elastic.clients.elasticsearch.core.BulkRequest; -import co.elastic.clients.elasticsearch.core.BulkResponse; -import co.elastic.clients.elasticsearch.core.DeleteRequest; -import co.elastic.clients.elasticsearch.core.GetRequest; -import co.elastic.clients.elasticsearch.core.GetResponse; -import co.elastic.clients.elasticsearch.core.IndexRequest; -import co.elastic.clients.elasticsearch.core.SearchRequest; -import co.elastic.clients.elasticsearch.core.SearchResponse; -import co.elastic.clients.elasticsearch.core.bulk.BulkOperation; -import co.elastic.clients.elasticsearch.core.bulk.BulkResponseItem; -import co.elastic.clients.elasticsearch.core.bulk.UpdateOperation; -import co.elastic.clients.elasticsearch.core.search.Hit; -import co.elastic.clients.elasticsearch.core.search.SourceConfigParam; -import co.elastic.clients.elasticsearch.indices.CreateIndexRequest; -import co.elastic.clients.elasticsearch.indices.DeleteIndexRequest; -import co.elastic.clients.elasticsearch.indices.ExistsIndexTemplateRequest; -import co.elastic.clients.elasticsearch.indices.ExistsRequest; -import co.elastic.clients.elasticsearch.ingest.GetPipelineRequest; -import co.elastic.clients.json.JsonData; -import co.elastic.clients.json.jackson.JacksonJsonpMapper; -import co.elastic.clients.transport.ElasticsearchTransport; -import co.elastic.clients.transport.rest_client.RestClientTransport; -import jakarta.annotation.PostConstruct; -import lombok.Data; -import lombok.extern.slf4j.Slf4j; -import software.uncharted.terarium.esingest.configuration.ElasticsearchConfiguration; -import software.uncharted.terarium.esingest.models.output.IOutputDocument; - -@Service -@Data -@Slf4j -public class ElasticsearchService { - - private final ObjectMapper mapper; - - private final RestTemplateBuilder restTemplateBuilder; - - private RestTemplate restTemplate; - - private ElasticsearchClient client = null; - - private final ElasticsearchConfiguration config; - - protected RestTemplate getRestTemplate() { - if (restTemplate == null) { - initRestTemplate(); - } - return restTemplate; - } - - private void initRestTemplate() { - RestTemplateBuilder builder = getRestTemplateBuilder(); - if (config.isAuthEnabled()) { - builder = builder.basicAuthentication(config.getUsername(), config.getPassword()); - } - this.restTemplate = builder.build(); - } - - @PostConstruct - public void init() { - log.info("Connecting elasticsearch client to: {}", config.getUrl()); - - final RestClientBuilder httpClientBuilder = RestClient.builder( - HttpHost.create(config.getUrl())); - - if (config.isAuthEnabled()) { - String auth = config.getUsername() + ":" + config.getPassword(); - String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes(StandardCharsets.UTF_8)); - Header header = new BasicHeader("Authorization", "Basic " + encodedAuth); - - httpClientBuilder.setDefaultHeaders(new Header[] { header }); - } - - final RestClient httpClient = httpClientBuilder.build(); - - // Now you can create an ElasticsearchTransport object using the RestClient - final ElasticsearchTransport transport = new RestClientTransport(httpClient, new JacksonJsonpMapper(mapper)); - - client = new ElasticsearchClient(transport); - - try { - client.ping(); - } catch (final IOException e) { - log.error("Unable to ping Elasticsearch Rest Client", e); - } - } - - /** - * Create all indices that are not already present in the cluster - * - * @return True if the index exists, false otherwise - */ - public boolean containsIndex(final String indexName) throws IOException { - return client.indices().exists(ExistsRequest.of(e -> e.index(indexName))).value(); - } - - /** - * Check for the existence of a document in an index by id. - * - * @return True if the index exists, false otherwise - */ - public boolean contains(final String indexName, final String id) throws IOException { - final GetRequest req = new GetRequest.Builder() - .index(indexName) - .id(id) - .source(new SourceConfigParam.Builder().fetch(false).build()) - .build(); - - GetResponse response = client.get(req, JsonNode.class); - return response.found(); - } - - /** - * Create the provided index. - * - * @param index - * @throws IOException - */ - public void createIndex(final String index) throws IOException { - - final CreateIndexRequest req = new CreateIndexRequest.Builder().index(index).build(); - - client.indices().create(req); - } - - /** - * Create the provided index if it doesn't exist, if it does, delete it and - * re-create it. - * - * @param index - * @throws IOException - */ - public void createOrEnsureIndexIsEmpty(final String index) throws IOException { - if (containsIndex(index)) { - deleteIndex(index); - } - createIndex(index); - } - - /** - * Returns true if the ES cluster contains the index template with the provided - * name, false otherwise - * - * @param name The name of the index template to check existence for - * @return True if the index template is contained in the cluster, false - * otherwise - */ - public boolean containsIndexTemplate(final String name) throws IOException { - final ExistsIndexTemplateRequest req = new ExistsIndexTemplateRequest.Builder().name(name).build(); - - return client.indices().existsIndexTemplate(req).value(); - } - - /** - * Put an index template to the cluster - * - * @param name The name of the index template - * @param templateJson The index template json string - * @return True if the index template was successfully added, false otherwise - */ - public boolean putIndexTemplate(final String name, final String templateJson) { - return putTyped(name, templateJson, "index template", "_index_template"); - } - - /** - * Check if the cluster contains the pipeline with the provided id - * - * @param id The name of the pipeline to check existence for - * @return True if the pipeline is contained in the cluster, false otherwise - */ - public boolean containsPipeline(final String id) throws IOException { - final GetPipelineRequest req = new GetPipelineRequest.Builder().id(id).build(); - - return client.ingest().getPipeline(req).result().containsKey(id); - } - - /** - * Put a pipeline to the cluster - * - * @param name The name of the pipeline - * @param pipelineJson The pipeline json string - * @return True if the pipeline was successfully added, false otherwise - */ - public boolean putPipeline(final String name, final String pipelineJson) { - return putTyped(name, pipelineJson, "pipeline", "_ingest/pipeline"); - } - - /** - * Put a typed object to the cluster - * - * @param name The name of the object - * @param typedJson The object json string - * @param typeName The type of the object - * @param indexName The index to put the object in - * @return True if the object was successfully added, false otherwise - */ - private boolean putTyped(final String name, final String typedJson, final String typeName, final String indexName) { - log.info("Putting " + typeName + ": {}", name); - - try { - final HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_JSON); - final HttpEntity entity = new HttpEntity<>(typedJson, headers); - final ResponseEntity response = getRestTemplate().exchange( - new URI(config.getUrl() + "/" + indexName + "/" + name), - HttpMethod.PUT, entity, - JsonNode.class); - final JsonNode body = response.getBody(); - if (body != null) { - return body.at("/acknowledged").asBoolean(); - } - } catch (final Exception e) { - log.error("Error putting " + typeName + " {}", name, e); - } - return false; - } - - /** - * Search an index using a provided query (can be null for no query) - * - * @param The type of the document - * @param req - The search request - * @param tClass The class of the document - * @return A list of found documents. - */ - public List search(final SearchRequest req, final Class tClass) throws IOException { - log.info("Searching: {}", req.index()); - - final List docs = new ArrayList<>(); - final SearchResponse res = client.search(req, tClass); - for (final Hit hit : res.hits().hits()) { - docs.add(hit.source()); - } - return docs; - } - - /** - * Add a document to an index. - * - * @param The type of the document - * @param index The index to add the document to - * @param id The id of the document - * @param document The document to add - */ - public void index(final String index, final String id, final T document) throws IOException { - log.info("Indexing: {} into {}", id, index); - - final IndexRequest req = new IndexRequest.Builder() - .index(index) - .id(id) - .document(document) - .refresh(Refresh.WaitFor) - .build(); - - client.index(req); - } - - /** - * Remove a document from an index. - * - * @param index The index to remove the document from - * @param id The id of the document to remove - */ - public void delete(final String index, final String id) throws IOException { - log.info("Deleting: {} from {}", id, index); - - final DeleteRequest req = new DeleteRequest.Builder() - .index(index) - .id(id) - .refresh(Refresh.WaitFor) - .build(); - - client.delete(req); - } - - /** - * Remove an index. - * - * @param index The index to remove - */ - public void deleteIndex(final String index) throws IOException { - log.info("Deleting index: {}", index); - - DeleteIndexRequest deleteRequest = new DeleteIndexRequest.Builder() - .index(index) - .build(); - - client.indices().delete(deleteRequest); - } - - /** - * Get a single document by id. - * - * @param The type of the document - * @param index The index to get the document from - * @param id The id of the document to get - * @param tClass The class of the document - * @return The document if found, null otherwise - */ - public T get(final String index, final String id, final Class tClass) throws IOException { - log.info("Getting: {} from {}", id, index); - - final GetRequest req = new GetRequest.Builder() - .index(index) - .id(id) - .build(); - - final GetResponse res = client.get(req, tClass); - if (res.found()) { - return res.source(); - } - return null; - } - - @Data - static public class BulkOpResponse { - private List errors; - private long took; - } - - public BulkOpResponse bulkIndex(String index, List docs) - throws IOException { - BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); - - for (Output doc : docs) { - if (doc.getId() == null) { - throw new RuntimeException("Document id cannot be null"); - } - bulkRequest.operations(op -> op - .index(idx -> idx - .index(index) - .id(doc.getId().toString()) - .document(doc))); - } - - BulkResponse bulkResponse = client.bulk(bulkRequest.build()); - - List errors = new ArrayList<>(); - if (bulkResponse.errors()) { - for (BulkResponseItem item : bulkResponse.items()) { - ErrorCause error = item.error(); - if (error != null) { - errors.add(error.reason()); - } - } - } - - BulkOpResponse r = new BulkOpResponse(); - r.setErrors(errors); - r.setTook(bulkResponse.took()); - return r; - } - - public BulkOpResponse bulkUpdate(String index, List docs) - throws IOException { - BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); - - List operations = new ArrayList<>(); - for (Output doc : docs) { - if (doc.getId() == null) { - throw new RuntimeException("Document id cannot be null"); - } - UpdateOperation updateOperation = new UpdateOperation.Builder() - .index(index) - .id(doc.getId().toString()) - .action(a -> a.doc(doc)) - .build(); - - BulkOperation operation = new BulkOperation.Builder().update(updateOperation).build(); - operations.add(operation); - } - // Add the BulkOperation to the BulkRequest - bulkRequest.operations(operations); - - BulkResponse bulkResponse = client.bulk(bulkRequest.build()); - - List errors = new ArrayList<>(); - if (bulkResponse.errors()) { - for (BulkResponseItem item : bulkResponse.items()) { - ErrorCause error = item.error(); - if (error != null) { - String reason = error.reason(); - if (reason != null) { - errors.add(error.reason()); - } - } - } - } - - BulkOpResponse r = new BulkOpResponse(); - r.setErrors(errors); - r.setTook(bulkResponse.took()); - return r; - } - - @Data - static public class ScriptedUpdatedDoc { - String id; - Map params; - } - - public BulkOpResponse bulkScriptedUpdate(String index, String script, List docs) - throws IOException { - - BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); - - List operations = new ArrayList<>(); - for (ScriptedUpdatedDoc doc : docs) { - BulkOperation operation = new BulkOperation.Builder().update(u -> u - .id(doc.getId()) - .index(index) - .retryOnConflict(10) - .action(action -> action - .script(s -> s - .inline(inlineScript -> inlineScript - .lang("painless") - .params(doc.getParams()) - .source(script))))) - .build(); - - operations.add(operation); - } - - // Add the BulkOperation to the BulkRequest - bulkRequest.operations(operations); - - BulkResponse bulkResponse = client.bulk(bulkRequest.build()); - - List errors = new ArrayList<>(); - if (bulkResponse.errors()) { - for (BulkResponseItem item : bulkResponse.items()) { - ErrorCause error = item.error(); - if (error != null) { - errors.add(error.reason()); - } - } - } - - BulkOpResponse r = new BulkOpResponse(); - r.setErrors(errors); - r.setTook(bulkResponse.took()); - return r; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/s3/S3Service.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/s3/S3Service.java deleted file mode 100644 index a3eff0e792..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/service/s3/S3Service.java +++ /dev/null @@ -1,75 +0,0 @@ -package software.uncharted.terarium.esingest.service.s3; - -import java.net.URI; - -import lombok.extern.slf4j.Slf4j; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; -import software.amazon.awssdk.auth.credentials.AwsCredentials; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.NoSuchKeyException; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.uncharted.terarium.esingest.configuration.Config; -import software.uncharted.terarium.esingest.configuration.Config.Amazon; - -@Slf4j -public class S3Service { - - private S3Client client; - - public S3Service(Amazon config) { - final Config.AmazonS3 s3Config = config.getS3(); - final Config.AmazonCredentials credentials = config.getCredential(); - final AwsCredentials creds = StaticCredentialsProvider - .create(AwsBasicCredentials.create(credentials.getAccessKey(), credentials.getSecretKey())) - .resolveCredentials(); - - if (s3Config.getUrl() != null && !s3Config.getUrl().isEmpty()) { - client = S3Client.builder() - .credentialsProvider(StaticCredentialsProvider.create(creds)) - .region(Region.of(s3Config.getRegion())) - .forcePathStyle(true) - .endpointOverride(URI.create(s3Config.getUrl())) - .build(); - } else { - client = S3Client.builder() - .credentialsProvider(StaticCredentialsProvider.create(creds)) - .region(Region.of(s3Config.getRegion())) - .build(); - } - } - - public void destroy() { - client.close(); - } - - public PutObjectResponse putObject(final String bucketName, final String key, final byte[] data) { - log.debug("Putting object {} in bucket {}", key, bucketName); - final PutObjectRequest request = PutObjectRequest.builder() - .bucket(bucketName) - .key(key) - .build(); - return client.putObject(request, RequestBody.fromBytes(data)); - } - - public boolean objectExists(final String bucketName, final String key) { - log.debug("Checking if object {} exists in bucket {}", key, bucketName); - final HeadObjectRequest request = HeadObjectRequest.builder() - .bucket(bucketName) - .key(key) - .build(); - try { - client.headObject(request); - log.debug("Object {} exists in bucket {}", key, bucketName); - return true; - } catch (NoSuchKeyException e) { - log.debug("Object {} does not exist in bucket {}", key, bucketName); - return false; - } - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/ConcurrentBiMap.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/ConcurrentBiMap.java deleted file mode 100644 index 1cc3ac8dd7..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/ConcurrentBiMap.java +++ /dev/null @@ -1,60 +0,0 @@ -package software.uncharted.terarium.esingest.util; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.locks.ReentrantLock; - -public class ConcurrentBiMap { - - private final ReentrantLock lock = new ReentrantLock(); - - ConcurrentMap keyToValue = new ConcurrentHashMap<>(); - ConcurrentMap valueToKey = new ConcurrentHashMap<>(); - - public boolean containsKey(K key) { - lock.lock(); - try { - return keyToValue.containsKey(key); - } finally { - lock.unlock(); - } - } - - public boolean containsValue(V val) { - lock.lock(); - try { - return valueToKey.containsKey(val); - } finally { - lock.unlock(); - } - } - - public void put(K k, V v) { - lock.lock(); - try { - keyToValue.put(k, v); - valueToKey.put(v, k); - } finally { - lock.unlock(); - } - } - - public V get(K k) { - lock.lock(); - try { - return keyToValue.get(k); - } finally { - lock.unlock(); - } - } - - public K getKey(V v) { - lock.lock(); - try { - return valueToKey.get(v); - } finally { - lock.unlock(); - } - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/FileUtil.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/FileUtil.java deleted file mode 100644 index f62062d024..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/FileUtil.java +++ /dev/null @@ -1,45 +0,0 @@ -package software.uncharted.terarium.esingest.util; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -public class FileUtil { - - public static String getFilenameWithoutExtension(String filename) { - int dotIndex = filename.lastIndexOf('.'); - if (dotIndex > 0) { - filename = filename.substring(0, dotIndex); - } - return filename; - } - - public static String getFilenameWithoutExtension(Path path) { - return getFilenameWithoutExtension(path.getFileName().toString()); - } - - public static List getJSONLineFilesInDir(Path dir) throws IOException { - try (Stream stream = Files.walk(dir)) { - List files = stream - .filter(Files::isRegularFile) - .filter(path -> path.toString().endsWith(".jsonl")) - .collect(Collectors.toCollection(LinkedList::new)); - return files; - } - } - - public static List getJsonFiles(Path dir) throws IOException { - List jsonFiles = new ArrayList<>(); - Files.walk(dir) - .filter(Files::isRegularFile) - .filter(path -> path.toString().endsWith(".json")) - .forEach(jsonFiles::add); - return jsonFiles; - } - -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/TimeFormatter.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/TimeFormatter.java deleted file mode 100644 index 8f25083aa1..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/TimeFormatter.java +++ /dev/null @@ -1,46 +0,0 @@ -package software.uncharted.terarium.esingest.util; - -import java.util.concurrent.TimeUnit; - -public class TimeFormatter { - - public static String format(long millis) { - if (millis < 0) { - throw new IllegalArgumentException("Duration must be greater than zero!"); - } - - long days = TimeUnit.MILLISECONDS.toDays(millis); - millis -= TimeUnit.DAYS.toMillis(days); - long hours = TimeUnit.MILLISECONDS.toHours(millis); - millis -= TimeUnit.HOURS.toMillis(hours); - long minutes = TimeUnit.MILLISECONDS.toMinutes(millis); - millis -= TimeUnit.MINUTES.toMillis(minutes); - long seconds = TimeUnit.MILLISECONDS.toSeconds(millis); - millis -= TimeUnit.MINUTES.toMillis(seconds); - - StringBuilder sb = new StringBuilder(64); - if (days > 0) { - sb.append(days); - sb.append("d "); - } - if (days > 0 || hours > 0) { - sb.append(hours); - sb.append("h "); - } - if (days > 0 || hours > 0 || minutes > 0) { - sb.append(minutes); - sb.append("m "); - } - if (days > 0 || hours > 0 || minutes > 0 || seconds > 0) { - sb.append(seconds); - sb.append("s "); - } - if (days > 0 || hours > 0 || minutes > 0 || seconds > 0 || millis > 0) { - sb.append(seconds); - sb.append("ms"); - } - - return (sb.toString()); - - } -} diff --git a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/UUIDUtil.java b/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/UUIDUtil.java deleted file mode 100644 index d1d4e41fe7..0000000000 --- a/packages/es-ingest/src/main/java/software/uncharted/terarium/esingest/util/UUIDUtil.java +++ /dev/null @@ -1,16 +0,0 @@ -package software.uncharted.terarium.esingest.util; - -import java.util.Random; -import java.util.UUID; - -public class UUIDUtil { - - static public UUID generateSeededUUID(String seed) { - Random random = new Random(seed.hashCode()); - - long mostSigBits = random.nextLong(); - long leastSigBits = random.nextLong(); - - return new UUID(mostSigBits, leastSigBits); - } -} diff --git a/packages/es-ingest/src/main/resources/application-ide.properties b/packages/es-ingest/src/main/resources/application-ide.properties deleted file mode 100644 index 22380cd529..0000000000 --- a/packages/es-ingest/src/main/resources/application-ide.properties +++ /dev/null @@ -1,9 +0,0 @@ -######################################################################################################################## -# Elasticsearch configuration -######################################################################################################################## -terarium.elasticsearch.url=http://localhost:9200 - -######################################################################################################################## -# aws credentials configuration -######################################################################################################################## -terarium.amazon.s3.url=http://localhost:9000 diff --git a/packages/es-ingest/src/main/resources/application-local.properties b/packages/es-ingest/src/main/resources/application-local.properties deleted file mode 100644 index d8262ef33f..0000000000 --- a/packages/es-ingest/src/main/resources/application-local.properties +++ /dev/null @@ -1,14 +0,0 @@ -######################################################################################################################## -# Elasticsearch configuration -######################################################################################################################## -terarium.elasticsearch.url=http://elasticsearch:9200 -terarium.elasticsearch.auth-enabled=false - -######################################################################################################################## -# aws credentials configuration -######################################################################################################################## -terarium.file-storage-s3-bucket-name=askem-local-storage -terarium.amazon.credential.access-key=admin -terarium.amazon.credential.secret-key=admin123 -terarium.amazon.s3.region=us-east-1 -terarium.amazon.s3.url=http://minio:9000 diff --git a/packages/es-ingest/src/main/resources/application-secrets.properties.encrypted b/packages/es-ingest/src/main/resources/application-secrets.properties.encrypted deleted file mode 100644 index 300c5580f8..0000000000 --- a/packages/es-ingest/src/main/resources/application-secrets.properties.encrypted +++ /dev/null @@ -1,14 +0,0 @@ -$ANSIBLE_VAULT;1.1;AES256 -36303039393239646339623163356531646235633235303737613937653937666132363064653439 -6331633162666165353232383237626363616364323336630a666562303766323462326564646232 -65386165306233303233653439366661666265616135666538376364333462303332346434383030 -3935646463356534650a623935623966346566383533363238343430306466366639653334623065 -38376162383762633837623032343631303630333963356261663230646130323233646136316137 -64666534666561653165386633396566636261313737393661353839343164376465323437666132 -38663433376435306361653465303237336264373032623736363763343237373566653233356134 -35666239396463353464336430396534636332353534396431363436383734316463346263333736 -34643835383237343536323932343932643039363836393639356165316634373438323833613265 -63373130613136333061333736363230663331353965393231643431326330663964663162653431 -64336462623934376434636363623461363333343061333038643131653030373937653061336665 -65636536613337323364333366663862663061646238383365346566623538373665303765613362 -3163 diff --git a/packages/es-ingest/src/main/resources/application.properties b/packages/es-ingest/src/main/resources/application.properties deleted file mode 100644 index 88a0fc3b96..0000000000 --- a/packages/es-ingest/src/main/resources/application.properties +++ /dev/null @@ -1,54 +0,0 @@ - -######################################################################################################################## -# Logging -######################################################################################################################## -logging.pattern.console=%d{yyyy-MM-dd HH:mm:ss} [%level] %msg [%c:%L]%n - -######################################################################################################################## -# Elasticsearch configuration -######################################################################################################################## -terarium.elasticsearch.url=https://elasticsearch.staging.terarium.ai:443 -terarium.elasticsearch.index.prefix=tds -terarium.elasticsearch.index.suffix=tera_2.0 -terarium.elasticsearch.auth-enabled=true -terarium.elasticsearch.username=${es-username} -terarium.elasticsearch.password=${es-password} - -######################################################################################################################## -# aws credentials configuration -######################################################################################################################## -aws-access-key-id=admin -aws-secret-access-key=admin123 -aws-url= -terarium.file-storage-s3-bucket-name=askem-staging-data-service -terarium.amazon.credential.access-key=${aws-access-key-id} -terarium.amazon.credential.secret-key=${aws-secret-access-key} -terarium.amazon.s3.region=us-east-1 -terarium.amazon.s3.url=${aws-url} - -######################################################################################################################## -# Ingest configuration -######################################################################################################################## -model-data-dir=/PATH/TO/MODELS -epi-data-dir=/PATH/TO/EPI -climate-data-dir=/PATH/TO/CLIMATE - -terarium.ingestParams[0].name="Model Dataset" -terarium.ingestParams[0].inputDir=${model-data-dir} -terarium.ingestParams[0].topics=model, amr -terarium.ingestParams[0].outputIndexRoot=model -terarium.ingestParams[0].ingestClass=software.uncharted.terarium.esingest.ingests.model.ModelIngest - -terarium.ingestParams[1].name="Opensource Epidemiology Dataset" -terarium.ingestParams[1].inputDir=${epi-data-dir} -terarium.ingestParams[1].topics=epi, epidemiology -terarium.ingestParams[1].outputIndexRoot=document -terarium.ingestParams[1].ingestClass=software.uncharted.terarium.esingest.ingests.epi.DocumentIngest -terarium.ingestParams[1].uploadAssetsToS3=true - -terarium.ingestParams[2].name="Climate Dataset" -terarium.ingestParams[2].inputDir=${climate-data-dir} -terarium.ingestParams[2].topics=climate -terarium.ingestParams[2].outputIndexRoot=document -terarium.ingestParams[2].ingestClass=software.uncharted.terarium.esingest.ingests.climate.DocumentIngest -terarium.ingestParams[2].uploadAssetsToS3=true diff --git a/packages/es-ingest/src/test/java/software/uncharted/terarium/esingest/ElasticIngestApplicationTests.java b/packages/es-ingest/src/test/java/software/uncharted/terarium/esingest/ElasticIngestApplicationTests.java deleted file mode 100644 index 6de64f35d8..0000000000 --- a/packages/es-ingest/src/test/java/software/uncharted/terarium/esingest/ElasticIngestApplicationTests.java +++ /dev/null @@ -1,15 +0,0 @@ -package software.uncharted.terarium.esingest; - -import org.junit.jupiter.api.Test; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.ActiveProfiles; - -@SpringBootTest -@ActiveProfiles({ "local", "test" }) -public class ElasticIngestApplicationTests { - - @Test - void contextLoads() { - } - -} diff --git a/packages/es-ingest/src/test/resources/application-test.properties b/packages/es-ingest/src/test/resources/application-test.properties deleted file mode 100644 index 2877dae872..0000000000 --- a/packages/es-ingest/src/test/resources/application-test.properties +++ /dev/null @@ -1,14 +0,0 @@ -######################################################################################################################## -# Elasticsearch configuration -######################################################################################################################## -terarium.elasticsearch.url=http://localhost:9200 -terarium.elasticsearch.auth-enabled=false - -######################################################################################################################## -# aws credentials configuration -######################################################################################################################## -terarium.file-storage-s3-bucket-name=askem-local-storage -terarium.amazon.credential.access-key=admin -terarium.amazon.credential.secret-key=admin123 -terarium.amazon.s3.region=us-east-1 -terarium.amazon.s3.url=http://localhost:9000 diff --git a/settings.gradle b/settings.gradle index dd8bbce54a..ec2dc8ab0b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -11,4 +11,3 @@ rootProject.name = "terarium" include ":packages:server" include ":packages:taskrunner" include ':packages:db-migration' -include ':packages:es-ingest'