Skip to content

Commit

Permalink
Ensure columns are recreated on dataset creation (#3827)
Browse files Browse the repository at this point in the history
  • Loading branch information
kbirk authored Jun 12, 2024
1 parent b4d1ebb commit d69a795
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 118 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.responses.ApiResponses;
import jakarta.transaction.Transactional;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -27,9 +23,6 @@
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.http.HttpEntity;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
Expand Down Expand Up @@ -57,7 +50,6 @@
import software.uncharted.terarium.hmiserver.models.dataservice.ResponseDeleted;
import software.uncharted.terarium.hmiserver.models.dataservice.ResponseStatus;
import software.uncharted.terarium.hmiserver.models.dataservice.dataset.Dataset;
import software.uncharted.terarium.hmiserver.models.dataservice.dataset.DatasetColumn;
import software.uncharted.terarium.hmiserver.proxies.climatedata.ClimateDataProxy;
import software.uncharted.terarium.hmiserver.proxies.jsdelivr.JsDelivrProxy;
import software.uncharted.terarium.hmiserver.security.Roles;
Expand Down Expand Up @@ -246,45 +238,6 @@ public ResponseEntity<Dataset> getDataset(
}
}

/**
* Extracts columns from the dataset if they are not already set and saves the dataset.
*
* @param dataset dataset to extract columns from
* @return the dataset with columns extracted and saved
* @throws IOException if there is an issue saving the dataset after extracting columns
*/
private Dataset extractColumnsAsNeededAndSave(final Dataset dataset, final Schema.Permission hasWritePermission)
throws IOException {
if (dataset.getColumns() != null && !dataset.getColumns().isEmpty()) {
// columns are set. No need to extract
return dataset;
}
if (dataset.getFileNames() != null || dataset.getFileNames().isEmpty()) {
// no file names to extract columns from
return dataset;
}

for (final String filename : dataset.getFileNames()) {
if (!filename.endsWith(".nc")) {
try {
final List<List<String>> csv = getCSVFile(filename, dataset.getId(), 1);
if (csv == null || csv.isEmpty()) {
continue;
}
updateHeaders(dataset, csv.get(0));
} catch (final IOException e) {
final String error = "Unable to get dataset CSV for file " + filename;
log.error(error, e);
continue;
}
} else {
return dataset;
}
}

return datasetService.updateAsset(dataset, hasWritePermission).get();
}

@DeleteMapping("/{id}")
@Secured(Roles.USER)
@Operation(summary = "Deletes a dataset")
Expand Down Expand Up @@ -384,7 +337,7 @@ public ResponseEntity<CsvAsset> getCsv(

final List<List<String>> csv;
try {
csv = getCSVFile(filename, datasetId, limit);
csv = datasetService.getCSVFile(filename, datasetId, limit);
if (csv == null) {
final String error = "Unable to get CSV";
log.error(error);
Expand All @@ -411,32 +364,6 @@ public ResponseEntity<CsvAsset> getCsv(
return ResponseEntity.ok(csvAsset);
}

private List<List<String>> getCSVFile(final String filename, final UUID datasetId, final Integer limit)
throws IOException {
String rawCSV = "";

final Optional<byte[]> bytes = datasetService.fetchFileAsBytes(datasetId, filename);
if (bytes.isEmpty()) {
return null;
}

final BufferedReader reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(bytes.get())));

String line = null;
Integer count = 0;
while ((line = reader.readLine()) != null) {
if (limit > 0 && count > limit) {
break;
}
rawCSV += line + '\n';
count++;
}

final List<List<String>> csv;
csv = csvToRecords(rawCSV);
return csv;
}

@GetMapping("/{id}/download-file")
@Secured(Roles.USER)
@Operation(summary = "Download an arbitrary dataset file")
Expand Down Expand Up @@ -658,7 +585,7 @@ public ResponseEntity<ResponseStatus> uploadCsv(
description = "There was an issue uploading the file",
content = @Content)
})
public ResponseEntity<Void> uploadData( // HttpServletRequest request,
public ResponseEntity<Void> uploadData(
@PathVariable("id") final UUID datasetId,
@RequestParam("filename") final String filename,
@RequestPart("file") final MultipartFile input,
Expand All @@ -684,7 +611,7 @@ public ResponseEntity<Void> uploadData( // HttpServletRequest request,
}

try {
updatedDataset = Optional.of(extractColumnsAsNeededAndSave(updatedDataset.get(), permission));
updatedDataset = Optional.of(datasetService.extractColumnsFromFiles(updatedDataset.get()));
} catch (final IOException e) {
final String error = "Unable to extract columns from dataset";
log.error(error, e);
Expand Down Expand Up @@ -767,7 +694,7 @@ private ResponseEntity<ResponseStatus> uploadCSVAndUpdateColumns(
return ResponseEntity.internalServerError().build();
}

updateHeaders(updatedDataset.get(), Arrays.asList(headers));
datasetService.addDatasetColumns(updatedDataset.get(), filename, Arrays.asList(headers));

// add the filename to existing file names
if (!updatedDataset.get().getFileNames().contains(filename)) {
Expand All @@ -786,28 +713,6 @@ private ResponseEntity<ResponseStatus> uploadCSVAndUpdateColumns(
}
}

private static void updateHeaders(final Dataset dataset, final List<String> headers) {
if (dataset.getColumns() == null) {
dataset.setColumns(new ArrayList<>());
}
for (final String header : headers) {
final DatasetColumn column = new DatasetColumn().setName(header).setAnnotations(new ArrayList<>());
dataset.getColumns().add(column);
}
}

private static List<List<String>> csvToRecords(final String rawCsvString) throws IOException {
final List<List<String>> records = new ArrayList<>();
try (final CSVParser parser = new CSVParser(new StringReader(rawCsvString), CSVFormat.DEFAULT)) {
for (final CSVRecord csvRecord : parser) {
final List<String> values = new ArrayList<>();
csvRecord.forEach(values::add);
records.add(values);
}
}
return records;
}

private static List<String> getColumn(final List<List<String>> matrix, final int columnNumber) {
final List<String> column = new ArrayList<>();
for (final List<String> strings : matrix) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ public class DatasetColumn extends TerariumEntity {
@JsonBackReference
private Dataset dataset;

@Column(length = 255)
private String fileName;

/**
* Datatype. One of: unknown, boolean, string, char, integer, int, float, double, timestamp, datetime, date, time
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.Stack;
import java.util.UUID;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
Expand Down Expand Up @@ -145,6 +147,11 @@ public List<TerariumAsset> cloneAndPersistAsset(final UUID projectId, final UUID
return res;
}

private List<String> removeDuplicates(final List<String> list) {
final Set<String> set = list.stream().collect(Collectors.toCollection(LinkedHashSet::new));
return new ArrayList<>(set);
}

/**
* Given a project, clone all assets and download all related files. Return everything as a singular ProjectExport
* object.
Expand Down Expand Up @@ -185,6 +192,9 @@ public ProjectExport exportProject(final UUID projectId) throws IOException {

final TerariumAsset currentAsset = currentAssetOptional.get();

// clean up any duplicate filenames from legacy data
currentAsset.setFileNames(removeDuplicates(currentAsset.getFileNames()));

final Map<String, FileExport> files =
terariumAssetService.exportAssetFiles(currentProjectAsset.getAssetId(), Schema.Permission.READ);

Expand Down Expand Up @@ -229,11 +239,9 @@ public Project importProject(final String userId, final String userName, final P

final ITerariumAssetService terariumAssetService = terariumAssetServices.getServiceByType(assetType);

// create the asset
final TerariumAsset asset =
(TerariumAsset) terariumAssetService.createAsset(assetExport.getAsset(), Schema.Permission.WRITE);
TerariumAsset asset = assetExport.getAsset();

// upload the files
// upload the files (do this first as the asset creation my use the files)
for (final Map.Entry<String, FileExport> entry :
assetExport.getFiles().entrySet()) {
final String fileName = entry.getKey();
Expand All @@ -242,6 +250,9 @@ public Project importProject(final String userId, final String userName, final P
asset.getId(), fileName, fileExport.getContentType(), fileExport.getBytes());
}

// create the asset
asset = (TerariumAsset) terariumAssetService.createAsset(assetExport.getAsset(), Schema.Permission.WRITE);

// add the asset to the project
final Optional<ProjectAsset> projectAsset =
projectAssetService.createProjectAsset(project, assetType, asset, Schema.Permission.WRITE);
Expand Down
Loading

0 comments on commit d69a795

Please sign in to comment.