Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BXC-4603 group by multiple fields #95

Merged
merged 8 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import java.nio.file.Path;

import edu.unc.lib.boxc.migration.cdm.options.GroupMappingSyncOptions;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;

import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException;
Expand Down Expand Up @@ -120,7 +119,7 @@ private void initialize() throws IOException {
}

private void validateOptions(GroupMappingOptions options) {
if (options.getGroupField().size() == 0) {
if (options.getGroupFields().isEmpty()) {
throw new IllegalArgumentException("Must provide an group field name");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class GroupMappingOptions {
description = {
"Name(s) of the CDM export field to perform grouping on."},
defaultValue = "file")
private List<String> groupField;
private List<String> groupFields;

@Option(names = {"-u", "--update"},
description = {
Expand All @@ -33,12 +33,12 @@ public class GroupMappingOptions {
description = "Overwrite mapping file if one already exists")
private boolean force;

public List<String> getGroupField() {
return groupField;
public List<String> getGroupFields() {
return groupFields;
}

public void setGroupField(List<String> groupField) {
this.groupField = groupField;
public void setGroupFields(List<String> groupFields) {
this.groupFields = groupFields;
}

public boolean getUpdate() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,7 @@ public void generateMapping(GroupMappingOptions options) throws Exception {

// Return set of all group keys that have at least 2 records in them
var multiMemberGroupSet = new HashSet<String>();
if (options.getGroupField().size() == 1) {
generateOneGroupMapping(options, stmt, multiMemberGroupSet, csvPrinter);
} else if (options.getGroupField().size() >= 2) {
generateMultipleGroupMapping(options, stmt, multiMemberGroupSet, csvPrinter);
}
generateMultipleGroupMapping(options, stmt, multiMemberGroupSet, csvPrinter);
} catch (SQLException e) {
throw new MigrationException("Error interacting with export index", e);
} finally {
Expand All @@ -109,58 +105,28 @@ public void generateMapping(GroupMappingOptions options) throws Exception {
}
}

private void generateOneGroupMapping(GroupMappingOptions options, Statement stmt,
HashSet<String> multiMemberGroupSet, CSVPrinter csvPrinter) throws Exception {
ResultSet groupRs = stmt.executeQuery("select " + options.getGroupField().get(0)
+ " from " + CdmIndexService.TB_NAME
+ " where " + CdmIndexService.ENTRY_TYPE_FIELD + " is null"
+ " group by " + options.getGroupField().get(0)
+ " having count(*) > 1");
while (groupRs.next()) {
var groupValue = groupRs.getString(1);
if (StringUtils.isBlank(groupValue)) {
continue;
}
multiMemberGroupSet.add(groupValue);
}

ResultSet rs = stmt.executeQuery("select " + CdmFieldInfo.CDM_ID + ", " + options.getGroupField().get(0)
+ " from " + CdmIndexService.TB_NAME
+ " where " + CdmIndexService.ENTRY_TYPE_FIELD + " is null"
+ " order by " + CdmFieldInfo.CDM_ID + " ASC");
while (rs.next()) {
String cdmId = rs.getString(1);
String matchedValue = rs.getString(2);

// Add empty mapping for records either not in groups or in groups with fewer than 2 members
if (StringUtils.isBlank(matchedValue) || !multiMemberGroupSet.contains(matchedValue)) {
log.debug("No matching field for object {}", cdmId);
csvPrinter.printRecord(cdmId, null);
continue;
}

String groupKey = GroupMappingInfo.GROUPED_WORK_PREFIX + options.getGroupField().get(0) + ":" + matchedValue;
csvPrinter.printRecord(cdmId, groupKey);
}
}

private void generateMultipleGroupMapping(GroupMappingOptions options, Statement stmt,
HashSet<String> multiMemberGroupSet, CSVPrinter csvPrinter) throws Exception {
int numberGroups = options.getGroupField().size();
String multipleGroups = String.join(", ", options.getGroupField());
Set<String> multiMemberGroupSet, CSVPrinter csvPrinter) throws Exception {
int numberGroups = options.getGroupFields().size();
String multipleGroups = String.join(", ", options.getGroupFields());

ResultSet groupRs = stmt.executeQuery("select " + multipleGroups
+ " from " + CdmIndexService.TB_NAME
+ " where " + CdmIndexService.ENTRY_TYPE_FIELD + " is null"
+ " group by " + multipleGroups
+ " having count(*) > 1");
while (groupRs.next()) {
List<String> groupValues = new ArrayList<>();
for (int i = 1; i < numberGroups + 1; i++) {
var groupValue = groupRs.getString(i);
if (StringUtils.isBlank(groupValue)) {
continue;
}
multiMemberGroupSet.add(groupValue);
groupValues.add(groupValue);
}
if (!groupValues.isEmpty()) {
krwong marked this conversation as resolved.
Show resolved Hide resolved
var multipleGroupValues = String.join(",", groupValues);
multiMemberGroupSet.add(multipleGroupValues);
}
}

Expand All @@ -177,17 +143,23 @@ private void generateMultipleGroupMapping(GroupMappingOptions options, Statement
matchedValues.add(matchedValue);
}
}
// Join matched values when grouping by multiple fields
String multipleMatchedValues = null;
if (numberGroups > 1 && !matchedValues.isEmpty()) {
krwong marked this conversation as resolved.
Show resolved Hide resolved
multipleMatchedValues = String.join(",", matchedValues);
}

// Add empty mapping for records either not in groups or in groups with fewer than 2 members
if (matchedValues.isEmpty() || !matchedValues.containsAll(multiMemberGroupSet)) {
log.debug("No matching field for object {}", cdmId);
csvPrinter.printRecord(cdmId, null);
continue;
}
// Add empty mapping for records either not in groups or in groups with fewer than 2 members
if (matchedValues.isEmpty() || (numberGroups == 1 && !multiMemberGroupSet.containsAll(matchedValues))
|| (numberGroups >= 2 && !multiMemberGroupSet.contains(multipleMatchedValues))) {
log.debug("No matching field for object {}", cdmId);
csvPrinter.printRecord(cdmId, null);
continue;
}

List<String> listGroups = new ArrayList<>();
for (int i = 0; i < numberGroups; i++) {
listGroups.add(options.getGroupField().get(i) + ":" + matchedValues.get(i));
listGroups.add(options.getGroupFields().get(i) + ":" + matchedValues.get(i));
}
String groupKey = GroupMappingInfo.GROUPED_WORK_PREFIX + String.join(",", listGroups);
csvPrinter.printRecord(cdmId, groupKey);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ private void assertMappingCount(int count) throws IOException {

private void setupGroupedIndex() throws Exception {
var options = new GroupMappingOptions();
options.setGroupField(Arrays.asList("groupa"));
options.setGroupFields(Arrays.asList("groupa"));
testHelper.getGroupMappingService().generateMapping(options);
var syncOptions = new GroupMappingSyncOptions();
syncOptions.setSortField("file");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ public void generateWithMatchAddToBottomTest() throws Exception {

private void setupGroupedIndex() throws Exception {
var options = new GroupMappingOptions();
options.setGroupField(Arrays.asList("groupa"));
options.setGroupFields(Arrays.asList("groupa"));
testHelper.getGroupMappingService().generateMapping(options);
var syncOptions = new GroupMappingSyncOptions();
syncOptions.setSortField("file");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ public void generateForceRunTest() throws Exception {
assertGroupAMappingsPresent(info);

options.setForce(true);
options.setGroupField(Arrays.asList("digitc"));
options.setGroupFields(Arrays.asList("digitc"));

service.generateMapping(options);

Expand Down Expand Up @@ -181,7 +181,7 @@ public void generateUpdateRunTest() throws Exception {
assertEquals(5, info.getMappings().size());

options.setUpdate(true);
options.setGroupField(Arrays.asList("digitc"));
options.setGroupFields(Arrays.asList("digitc"));

service.generateMapping(options);

Expand All @@ -201,7 +201,7 @@ public void generateUpdateRunTest() throws Exception {
public void generateUpdateForceRunTest() throws Exception {
indexExportSamples();
GroupMappingOptions options = makeDefaultOptions();
options.setGroupField(Arrays.asList("digitc"));
options.setGroupFields(Arrays.asList("digitc"));
service.generateMapping(options);

GroupMappingInfo info = service.loadMappings();
Expand All @@ -214,7 +214,7 @@ public void generateUpdateForceRunTest() throws Exception {

options.setUpdate(true);
options.setForce(true);
options.setGroupField(Arrays.asList("groupa"));
options.setGroupFields(Arrays.asList("groupa"));
service.generateMapping(options);

GroupMappingInfo info2 = service.loadMappings();
Expand Down Expand Up @@ -305,7 +305,7 @@ public void syncSingleRunTest() throws Exception {
public void syncSecondRunWithCleanupTest() throws Exception {
indexExportSamples();
GroupMappingOptions options = makeDefaultOptions();
options.setGroupField(Arrays.asList("digitc"));
options.setGroupFields(Arrays.asList("digitc"));
service.generateMapping(options);

service.syncMappings(makeDefaultSyncOptions());
Expand All @@ -326,7 +326,7 @@ public void syncSecondRunWithCleanupTest() throws Exception {
CdmIndexService.closeDbConnection(conn);
}

options.setGroupField(Arrays.asList("groupa"));
options.setGroupFields(Arrays.asList("groupa"));
options.setForce(true);
service.generateMapping(options);

Expand Down Expand Up @@ -356,7 +356,7 @@ public void syncSecondRunWithCleanupTest() throws Exception {
public void generateSingleRunMultipleGroupsTest() throws Exception {
indexExportSamples();
GroupMappingOptions options = new GroupMappingOptions();
options.setGroupField(Arrays.asList("groupa", "dcmi"));
options.setGroupFields(Arrays.asList("groupa", "dcmi"));
service.generateMapping(options);

GroupMappingInfo info = service.loadMappings();
Expand All @@ -378,7 +378,7 @@ public void generateSingleRunMultipleGroupsTest() throws Exception {
public void syncSingleRunMultipleGroupsTest() throws Exception {
indexExportSamples();
GroupMappingOptions options = new GroupMappingOptions();
options.setGroupField(Arrays.asList("groupa", "dcmi"));
options.setGroupFields(Arrays.asList("groupa", "dcmi"));
service.generateMapping(options);

service.syncMappings(makeDefaultSyncOptions());
Expand Down Expand Up @@ -412,7 +412,7 @@ public void generateMultipleGroupsForceUpdateRunTest() throws Exception {

options.setUpdate(true);
options.setForce(true);
options.setGroupField(Arrays.asList("groupa", "dcmi"));
options.setGroupFields(Arrays.asList("groupa", "dcmi"));

service.generateMapping(options);

Expand Down Expand Up @@ -513,7 +513,7 @@ private String asGroupKey(String matchValue) {

private GroupMappingOptions makeDefaultOptions() {
GroupMappingOptions options = new GroupMappingOptions();
options.setGroupField(Arrays.asList("groupa"));
options.setGroupFields(Arrays.asList("groupa"));
return options;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ private void assertMappingPresent(PermissionsInfo info, String cdmid, String eve

private void setupGroupedIndex() throws Exception {
var options = new GroupMappingOptions();
options.setGroupField(Arrays.asList("groupa"));
options.setGroupFields(Arrays.asList("groupa"));
testHelper.getGroupMappingService().generateMapping(options);
var syncOptions = new GroupMappingSyncOptions();
syncOptions.setSortField("file");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1498,7 +1498,7 @@ private GroupMappingSyncOptions makeDefaultSyncOptions() {

private void setupGroupIndex() throws Exception {
GroupMappingOptions groupOptions = new GroupMappingOptions();
groupOptions.setGroupField(Arrays.asList("groupa"));
groupOptions.setGroupFields(Arrays.asList("groupa"));
GroupMappingService groupService = testHelper.getGroupMappingService();
groupService.generateMapping(groupOptions);
groupService.syncMappings(makeDefaultSyncOptions());
Expand Down
Loading