diff --git a/src/main/java/gov/nasa/pds/registry/mgr/RegistryManagerCli.java b/src/main/java/gov/nasa/pds/registry/mgr/RegistryManagerCli.java index 1721e9f..df3dcc0 100644 --- a/src/main/java/gov/nasa/pds/registry/mgr/RegistryManagerCli.java +++ b/src/main/java/gov/nasa/pds/registry/mgr/RegistryManagerCli.java @@ -17,6 +17,7 @@ import gov.nasa.pds.registry.mgr.cmd.data.DeleteDataCmd; import gov.nasa.pds.registry.mgr.cmd.data.ExportFileCmd; import gov.nasa.pds.registry.mgr.cmd.data.SetArchiveStatusCmd; +import gov.nasa.pds.registry.mgr.cmd.data.UpdateAltIdsCmd; import gov.nasa.pds.registry.mgr.cmd.dd.DeleteDDCmd; import gov.nasa.pds.registry.mgr.cmd.dd.ExportDDCmd; import gov.nasa.pds.registry.mgr.cmd.dd.ListDDCmd; @@ -65,6 +66,7 @@ public static void printHelp() System.out.println(" delete-data Delete data from registry index"); System.out.println(" export-file Export a file from blob storage"); System.out.println(" set-archive-status Set product archive status"); + System.out.println(" update-alt-ids Update alternate IDs"); System.out.println(); System.out.println("Registry:"); @@ -244,9 +246,9 @@ private void initCommands() // Data commands.put("delete-data", new DeleteDataCmd()); - //commands.put("export-data", new ExportDataCmd()); commands.put("export-file", new ExportFileCmd()); commands.put("set-archive-status", new SetArchiveStatusCmd()); + commands.put("update-alt-ids", new UpdateAltIdsCmd()); } diff --git a/src/main/java/gov/nasa/pds/registry/mgr/cmd/data/UpdateAltIdsCmd.java b/src/main/java/gov/nasa/pds/registry/mgr/cmd/data/UpdateAltIdsCmd.java new file mode 100644 index 0000000..f384699 --- /dev/null +++ b/src/main/java/gov/nasa/pds/registry/mgr/cmd/data/UpdateAltIdsCmd.java @@ -0,0 +1,183 @@ +package gov.nasa.pds.registry.mgr.cmd.data; + + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.client.ResponseException; + +import gov.nasa.pds.registry.common.cfg.RegistryCfg; +import gov.nasa.pds.registry.common.es.client.EsUtils; +import gov.nasa.pds.registry.common.util.CloseUtils; +import gov.nasa.pds.registry.mgr.Constants; +import gov.nasa.pds.registry.mgr.cmd.CliCommand; +import gov.nasa.pds.registry.mgr.dao.RegistryDao; +import gov.nasa.pds.registry.mgr.dao.RegistryManager; + + +/** + * A CLI command to update product's alternate IDs in Elasticsearch. + * + * @author karpenko + */ +public class UpdateAltIdsCmd implements CliCommand +{ + private Logger log; + + /** + * Constructor + */ + public UpdateAltIdsCmd() + { + log = LogManager.getLogger(this.getClass()); + } + + + @Override + public void run(CommandLine cmdLine) throws Exception + { + if(cmdLine.hasOption("help")) + { + printHelp(); + return; + } + + RegistryCfg cfg = new RegistryCfg(); + cfg.url = cmdLine.getOptionValue("es", "http://localhost:9200"); + cfg.indexName = cmdLine.getOptionValue("index", Constants.DEFAULT_REGISTRY_INDEX); + cfg.authFile = cmdLine.getOptionValue("auth"); + + String filePath = cmdLine.getOptionValue("file"); + if(filePath == null) throw new Exception("Missing required parameter '-file'"); + File file = new File(filePath); + if(!file.exists()) throw new Exception("Input file doesn't exist: " + file.getAbsolutePath()); + + try + { + RegistryManager.init(cfg); + updateIds(file); + } + catch(ResponseException ex) + { + throw new Exception(EsUtils.extractErrorMessage(ex)); + } + finally + { + RegistryManager.destroy(); + } + } + + + private void updateIds(File file) throws Exception + { + BufferedReader rd = null; + + try + { + rd = new BufferedReader(new FileReader(file)); + + String line; + int lineNum = 0; + + while((line = rd.readLine()) != null) + { + lineNum++; + line = line.trim(); + + String[] ids = StringUtils.split(line, ",;\t "); + if(ids == null || ids.length != 2) + { + log.warn("Line " + lineNum + " has invalid value: [" + line + "]"); + continue; + } + + List newIds = new ArrayList(4); + + // First LIDVID value (old) + int idx = ids[0].indexOf("::"); + if(idx < 1) + { + log.warn("Line " + lineNum + " has invalid LIDVID: [" + ids[0] + "]"); + continue; + } + + // Add LIDVID + newIds.add(ids[0]); + // Add LID + newIds.add(ids[0].substring(0, idx)); + + // Second LIDVID value (new) + idx = ids[1].indexOf("::"); + if(idx < 1) + { + log.warn("Line " + lineNum + " has invalid LIDVID: [" + ids[1] + "]"); + continue; + } + + // Add LIDVID + newIds.add(ids[1]); + // Add LID + newIds.add(ids[1].substring(0, idx)); + + Map> idMap = new TreeMap<>(); + idMap.put(ids[0], newIds); + idMap.put(ids[1], newIds); + + log.info("Updating " + ids[0] + " -> " + ids[1]); + updateIds(idMap); + } + } + finally + { + CloseUtils.close(rd); + } + } + + + private void updateIds(Map> newIds) throws Exception + { + RegistryDao dao = RegistryManager.getInstance().getRegistryDao(); + + Map> existingIds = dao.getAlternateIds(newIds.keySet()); + if(existingIds == null || existingIds.isEmpty()) return; + + for(Map.Entry> entry: existingIds.entrySet()) + { + List additionalIds = newIds.get(entry.getKey()); + if(additionalIds != null) + { + entry.getValue().addAll(additionalIds); + } + } + + dao.updateAlternateIds(existingIds); + } + + + public void printHelp() + { + System.out.println("Usage: registry-manager update-alt-ids "); + + System.out.println(); + System.out.println("Update product's alternate IDs"); + System.out.println(); + System.out.println("Required parameters:"); + System.out.println(" -file CSV file with the list of IDs"); + System.out.println("Optional parameters:"); + System.out.println(" -auth Authentication config file"); + System.out.println(" -es Elasticsearch URL. Default is http://localhost:9200"); + System.out.println(" -index Elasticsearch index name. Default is 'registry'"); + System.out.println(); + } + +} diff --git a/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryDao.java b/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryDao.java new file mode 100644 index 0000000..925ebbf --- /dev/null +++ b/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryDao.java @@ -0,0 +1,125 @@ +package gov.nasa.pds.registry.mgr.dao; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.Map; +import java.util.Set; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.RestClient; + +import gov.nasa.pds.registry.common.es.client.SearchResponseParser; +import gov.nasa.pds.registry.common.es.dao.BulkResponseParser; +import gov.nasa.pds.registry.common.util.CloseUtils; +import gov.nasa.pds.registry.mgr.dao.resp.GetAltIdsParser; + +/** + * Data access object + * @author karpenko + */ +public class RegistryDao +{ + private Logger log; + + private RestClient client; + private String indexName; + + private boolean pretty = false; + + /** + * Constructor + * @param client Elasticsearch client + * @param indexName Elasticsearch index + */ + public RegistryDao(RestClient client, String indexName) + { + log = LogManager.getLogger(this.getClass()); + + this.client = client; + this.indexName = indexName; + } + + + /** + * Generate pretty JSONs for debugging + * @param b boolean flag + */ + public void setPretty(boolean b) + { + this.pretty = b; + } + + + /** + * Get product's alternative IDs by primary key + * @param ids primary keys (usually LIDVIDs) + * @return ID map: key = product primary key (usually LIDVID), value = set of alternate IDs + * @throws Exception an exception + */ + public Map> getAlternateIds(Collection ids) throws Exception + { + if(ids == null || ids.isEmpty()) return null; + + RegistryRequestBuilder bld = new RegistryRequestBuilder(); + String jsonReq = bld.createGetAlternateIdsRequest(ids); + + String reqUrl = "/" + indexName + "/_search"; + if(pretty) reqUrl += "?pretty"; + + Request req = new Request("GET", reqUrl); + req.setJsonEntity(jsonReq); + Response resp = client.performRequest(req); + + //DebugUtils.dumpResponseBody(resp); + + GetAltIdsParser cb = new GetAltIdsParser(); + SearchResponseParser parser = new SearchResponseParser(); + parser.parseResponse(resp, cb); + + return cb.getIdMap(); + } + + + /** + * Update alternate IDs by primary keys + * @param newIds ID map: key = product primary key (usually LIDVID), + * value = additional alternate IDs to be added to existing alternate IDs. + * @throws Exception an exception + */ + public void updateAlternateIds(Map> newIds) throws Exception + { + if(newIds == null || newIds.isEmpty()) return; + + RegistryRequestBuilder bld = new RegistryRequestBuilder(); + String json = bld.createUpdateAltIdsRequest(newIds); + log.debug("Request:\n" + json); + + String reqUrl = "/" + indexName + "/_bulk"; //?refresh=wait_for"; + Request req = new Request("POST", reqUrl); + req.setJsonEntity(json); + + Response resp = client.performRequest(req); + + // Check for Elasticsearch errors. + InputStream is = null; + InputStreamReader rd = null; + try + { + is = resp.getEntity().getContent(); + rd = new InputStreamReader(is); + + BulkResponseParser parser = new BulkResponseParser(); + parser.parse(rd); + } + finally + { + CloseUtils.close(rd); + CloseUtils.close(is); + } + } + +} diff --git a/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryManager.java b/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryManager.java index c900549..743ddca 100644 --- a/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryManager.java +++ b/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryManager.java @@ -23,6 +23,7 @@ public class RegistryManager private RestClient esClient; private SchemaDao schemaDao; private DataDictionaryDao dataDictionaryDao; + private RegistryDao registryDao; /** @@ -47,7 +48,8 @@ private RegistryManager(RegistryCfg cfg) throws Exception log.info("Registry index: " + indexName); schemaDao = new SchemaDao(esClient, indexName); - dataDictionaryDao = new DataDictionaryDao(esClient, indexName); + dataDictionaryDao = new DataDictionaryDao(esClient, indexName); + registryDao = new RegistryDao(esClient, indexName); } @@ -103,4 +105,14 @@ public DataDictionaryDao getDataDictionaryDao() return dataDictionaryDao; } + + /** + * Get registry DAO object. + * @return Registry DAO + */ + public RegistryDao getRegistryDao() + { + return registryDao; + } + } diff --git a/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryRequestBuilder.java b/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryRequestBuilder.java index 2f99f24..60fbbd9 100644 --- a/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryRequestBuilder.java +++ b/src/main/java/gov/nasa/pds/registry/mgr/dao/RegistryRequestBuilder.java @@ -5,7 +5,9 @@ import java.io.IOException; import java.io.StringWriter; import java.io.Writer; +import java.util.Collection; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import com.google.gson.Gson; @@ -269,27 +271,93 @@ public String createMatchAllQuery() throws IOException /** - * Build update label status request - * @param status new PDS label status - * @param field filter field name - * @param value filter value + * Build a query to select alternate ids by document primary key + * @param ids list of primary keys (lidvids right now) * @return JSON - * @throws IOException an exception + * @throws Exception an exception */ - public String createUpdateStatusRequest(String status, String field, String value) throws IOException + public String createGetAlternateIdsRequest(Collection ids) throws Exception { + if(ids == null || ids.isEmpty()) throw new Exception("Missing ids"); + StringWriter out = new StringWriter(); JsonWriter writer = createJsonWriter(out); + // Create ids query writer.beginObject(); - // Script - writer.name("script").value("ctx._source.archive_status = '" + status + "'"); - // Query - EsQueryUtils.appendFilterQuery(writer, field, value); + + // Exclude source from response + writer.name("_source").value("alternate_ids"); + writer.name("size").value(ids.size()); + + writer.name("query"); + writer.beginObject(); + writer.name("ids"); + writer.beginObject(); + + writer.name("values"); + writer.beginArray(); + for(String id: ids) + { + writer.value(id); + } + writer.endArray(); + + writer.endObject(); + writer.endObject(); writer.endObject(); writer.close(); return out.toString(); } + + public String createUpdateAltIdsRequest(Map> newIds) throws Exception + { + if(newIds == null || newIds.isEmpty()) throw new IllegalArgumentException("Missing ids"); + + StringBuilder bld = new StringBuilder(); + + // Build NJSON (new-line delimited JSON) + for(Map.Entry> entry: newIds.entrySet()) + { + // Line 1: Elasticsearch document ID + bld.append("{ \"update\" : {\"_id\" : \"" + entry.getKey() + "\" } }\n"); + + // Line 2: Data + String dataJson = buildUpdateDocJson("alternate_ids", entry.getValue()); + bld.append(dataJson); + bld.append("\n"); + } + + return bld.toString(); + + } + + + private String buildUpdateDocJson(String field, Collection values) throws Exception + { + StringWriter out = new StringWriter(); + JsonWriter writer = createJsonWriter(out); + + writer.beginObject(); + + writer.name("doc"); + writer.beginObject(); + + writer.name(field); + + writer.beginArray(); + for(String value: values) + { + writer.value(value); + } + writer.endArray(); + + writer.endObject(); + writer.endObject(); + + writer.close(); + return out.toString(); + } } diff --git a/src/main/java/gov/nasa/pds/registry/mgr/dao/resp/GetAltIdsParser.java b/src/main/java/gov/nasa/pds/registry/mgr/dao/resp/GetAltIdsParser.java new file mode 100644 index 0000000..6209ac4 --- /dev/null +++ b/src/main/java/gov/nasa/pds/registry/mgr/dao/resp/GetAltIdsParser.java @@ -0,0 +1,58 @@ +package gov.nasa.pds.registry.mgr.dao.resp; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import gov.nasa.pds.registry.common.es.client.SearchResponseParser; + + +public class GetAltIdsParser implements SearchResponseParser.Callback +{ + private Map> map; + + + public GetAltIdsParser() + { + map = new TreeMap<>(); + } + + + public Map> getIdMap() + { + return map; + } + + + @Override + public void onRecord(String id, Object rec) throws Exception + { + if(rec instanceof Map) + { + Object obj = ((Map)rec).get("alternate_ids"); + if(obj == null) return; + + // Multiple values + if(obj instanceof List) + { + Set altIds = new TreeSet<>(); + for(Object item: (List)obj) + { + altIds.add(item.toString()); + } + + map.put(id, altIds); + } + // Single value + else if(obj instanceof String) + { + Set altIds = new TreeSet<>(); + altIds.add((String)obj); + map.put(id, altIds); + } + } + } + +} diff --git a/src/main/resources/elastic/registry.json b/src/main/resources/elastic/registry.json index b95a5e0..f1a98b7 100644 --- a/src/main/resources/elastic/registry.json +++ b/src/main/resources/elastic/registry.json @@ -29,6 +29,7 @@ "lid": { "type": "keyword" }, "vid": { "type": "float" }, "lidvid": { "type": "keyword" }, + "alternate_ids": { "type": "keyword" }, "title": {"type": "text", "analyzer": "english"}, "description": {"type": "text", "analyzer": "english"}, diff --git a/src/test/java/tt/TestEsQueryBuilder.java b/src/test/java/tt/TestEsQueryBuilder.java index 3923cd2..7a5a8c1 100644 --- a/src/test/java/tt/TestEsQueryBuilder.java +++ b/src/test/java/tt/TestEsQueryBuilder.java @@ -11,9 +11,6 @@ public static void main(String[] args) throws Exception testDelete(); System.out.println(); - - testUpdateStatus(); - System.out.println(); } @@ -33,10 +30,4 @@ private static void testDelete() throws Exception } - private static void testUpdateStatus() throws Exception - { - RegistryRequestBuilder bld = new RegistryRequestBuilder(true); - String json = bld.createUpdateStatusRequest("STAGED", "lidvid", "test::1.0"); - System.out.println(json); - } } diff --git a/src/test/java/tt/TestRegistryDao.java b/src/test/java/tt/TestRegistryDao.java new file mode 100644 index 0000000..b73733d --- /dev/null +++ b/src/test/java/tt/TestRegistryDao.java @@ -0,0 +1,33 @@ +package tt; + +import java.util.Arrays; +import java.util.Map; +import java.util.Set; + +import org.elasticsearch.client.RestClient; + +import gov.nasa.pds.registry.common.es.client.EsClientFactory; +import gov.nasa.pds.registry.mgr.dao.RegistryDao; + + +public class TestRegistryDao +{ + + public static void main(String[] args) throws Exception + { + RestClient client = EsClientFactory.createRestClient("localhost", null); + + try + { + RegistryDao dao = new RegistryDao(client, "registry"); + Map> idMap = dao.getAlternateIds(Arrays.asList("urn:nasa:pds:context:instrument:vg1.crs::1.0")); + Set altIds = idMap.get("urn:nasa:pds:context:instrument:vg1.crs::1.0"); + System.out.println(altIds); + } + finally + { + client.close(); + } + } + +}