Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat/met 5806 support embeddable resources profile #675

Merged
merged 16 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ public class RdfNamespaceContext implements NamespaceContext {
public static final String RDF_NAMESPACE_PREFIX = "rdf";
public static final String EDM_NAMESPACE_PREFIX = "edm";
public static final String ORE_NAMESPACE_PREFIX = "ore";
public static final String SVCS_NAMESPACE_PREFIX = "svcs";
public static final String DCTERMS_NAMESPACE_PREFIX = "dcterms";

private static final Map<String, String> PREFIX_TO_NAMESPACE_MAP = new HashMap<>();

Expand All @@ -30,6 +32,8 @@ public class RdfNamespaceContext implements NamespaceContext {
PREFIX_TO_NAMESPACE_MAP.put(RDF_NAMESPACE_PREFIX, "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
PREFIX_TO_NAMESPACE_MAP.put(ORE_NAMESPACE_PREFIX, "http://www.openarchives.org/ore/terms/");
PREFIX_TO_NAMESPACE_MAP.put(EDM_NAMESPACE_PREFIX, "http://www.europeana.eu/schemas/edm/");
PREFIX_TO_NAMESPACE_MAP.put(SVCS_NAMESPACE_PREFIX,"http://rdfs.org/sioc/services#");
PREFIX_TO_NAMESPACE_MAP.put(DCTERMS_NAMESPACE_PREFIX, "http://purl.org/dc/terms/");
}

@Override
Expand Down
5 changes: 5 additions & 0 deletions metis-media-service/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,10 @@
<groupId>org.wiremock</groupId>
<artifactId>wiremock-standalone</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package eu.europeana.metis.mediaprocessing;

/**
* Rdf xpath string constants.
*/
public final class RdfXpathConstants {

public static final String RDF_NAMESPACE = "/rdf:RDF";
public static final String ORE_AGGREGATION = RDF_NAMESPACE + "/ore:Aggregation";
public static final String EDM_OBJECT = ORE_AGGREGATION + "/edm:object/@rdf:resource";
public static final String EDM_IS_SHOWN_BY = ORE_AGGREGATION + "/edm:isShownBy/@rdf:resource";
public static final String EDM_HAS_VIEW = ORE_AGGREGATION + "/edm:hasView/@rdf:resource";
public static final String EDM_IS_SHOWN_AT = ORE_AGGREGATION + "/edm:isShownAt/@rdf:resource";

private RdfXpathConstants() {}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@
class AudioVideoProcessor implements MediaProcessor {

private static final Logger LOGGER = LoggerFactory.getLogger(AudioVideoProcessor.class);
public static final int FFPROBE_MAX_VERSION = 7;
public static final int FFPROBE_MIN_VERSION = 2;

private static String globalFfprobeCommand;

private final CommandExecutor commandExecutor;

private final String ffprobeCommand;

/**
Expand Down Expand Up @@ -97,7 +100,7 @@ static String discoverFfprobeCommand(CommandExecutor commandExecutor)
int indexVersion = output.lastIndexOf("version ") + "version ".length();
int version = Character.isDigit(output.charAt(indexVersion)) ?
Integer.parseInt(String.valueOf(output.charAt(indexVersion))) : 0;
if (!(version >= 2 && version < 7)) {
if (!(version >= FFPROBE_MIN_VERSION && version < FFPROBE_MAX_VERSION)) {
throw new MediaProcessorException("ffprobe version " + version + ".x not found");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ public ResourceExtractionResult copyMetadata(Resource resource, String detectedM
public boolean downloadResourceForFullProcessing() {
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.apache.tika.io.TikaInputStream;
Expand Down Expand Up @@ -51,27 +52,27 @@ enum ProcessingMode {FULL, REDUCED, NONE}
private final AudioVideoProcessor audioVideoProcessor;
private final TextProcessor textProcessor;
private final Media3dProcessor media3dProcessor;
private final OEmbedProcessor oEmbedProcessor;

/**
* Constructor meant for testing purposes.
*
* @param resourceDownloadClient The download client for resources.
* @param mimeTypeDetectHttpClient The mime type detector for URLs.
* @param tika A tika instance.
* @param imageProcessor An image processor.
* @param audioVideoProcessor An audio/video processor.
* @param textProcessor A text processor.
* @param mediaProcessorList the media processor list
*/
MediaExtractorImpl(ResourceDownloadClient resourceDownloadClient,
MimeTypeDetectHttpClient mimeTypeDetectHttpClient, TikaWrapper tika, ImageProcessor imageProcessor,
AudioVideoProcessor audioVideoProcessor, TextProcessor textProcessor, Media3dProcessor media3dProcessor) {
MimeTypeDetectHttpClient mimeTypeDetectHttpClient, TikaWrapper tika,
List<MediaProcessor> mediaProcessorList) {
this.resourceDownloadClient = resourceDownloadClient;
this.mimeTypeDetectHttpClient = mimeTypeDetectHttpClient;
this.tika = tika;
this.imageProcessor = imageProcessor;
this.audioVideoProcessor = audioVideoProcessor;
this.textProcessor = textProcessor;
this.media3dProcessor = media3dProcessor;
this.imageProcessor = (ImageProcessor) getMediaProcessor(mediaProcessorList, ImageProcessor.class);
this.audioVideoProcessor = (AudioVideoProcessor) getMediaProcessor(mediaProcessorList, AudioVideoProcessor.class);
this.textProcessor = (TextProcessor) getMediaProcessor(mediaProcessorList, TextProcessor.class);
this.media3dProcessor = (Media3dProcessor) getMediaProcessor(mediaProcessorList, Media3dProcessor.class);
this.oEmbedProcessor = (OEmbedProcessor) getMediaProcessor(mediaProcessorList, OEmbedProcessor.class);
}

/**
Expand Down Expand Up @@ -102,6 +103,16 @@ public MediaExtractorImpl(int redirectCount, int thumbnailGenerateTimeout,
this.textProcessor = new TextProcessor(thumbnailGenerator,
new PdfToImageConverter(new CommandExecutor(thumbnailGenerateTimeout)));
this.media3dProcessor = new Media3dProcessor();
this.oEmbedProcessor = new OEmbedProcessor();
}

private <T> Object getMediaProcessor(List<?> mediaProcessorList, Class<T> type) {
for (Object mediaProcessor : mediaProcessorList) {
if (type.isInstance(mediaProcessor)) {
return type.cast(mediaProcessor);
}
}
return null;
}

@Override
Expand Down Expand Up @@ -193,10 +204,10 @@ String detectType(Path path, String providedMimeType) throws IOException {
}
}

MediaProcessor chooseMediaProcessor(MediaType mediaType) {
MediaProcessor chooseMediaProcessor(MediaType mediaType, String detectedMimeType) {
final MediaProcessor processor;
switch (mediaType) {
case TEXT -> processor = textProcessor;
case TEXT, OTHER -> processor = chooseByDetectedMimeType(mediaType, detectedMimeType);
case AUDIO, VIDEO -> processor = audioVideoProcessor;
case IMAGE -> processor = imageProcessor;
case THREE_D -> processor = media3dProcessor;
Expand All @@ -205,6 +216,20 @@ MediaProcessor chooseMediaProcessor(MediaType mediaType) {
return processor;
}

MediaProcessor chooseByDetectedMimeType(MediaType mediaType, String detectedMimeType) {
if (detectedMimeType == null) {
return null;
} else if ((mediaType == MediaType.TEXT || mediaType == MediaType.OTHER) &&
(detectedMimeType.startsWith("text/xml") || detectedMimeType.startsWith("application/xml")
|| detectedMimeType.startsWith("application/json"))) {
return oEmbedProcessor;
} else if (mediaType == MediaType.TEXT) {
return textProcessor;
} else {
return null;
}
}

void verifyAndCorrectContentAvailability(Resource resource, ProcessingMode mode,
String detectedMimeType) throws MediaExtractionException, IOException {

Expand Down Expand Up @@ -255,19 +280,32 @@ ResourceExtractionResult performProcessing(Resource resource, ProcessingMode mod
}

// Choose the right media processor.
final MediaProcessor processor = chooseMediaProcessor(MediaType.getMediaType(detectedMimeType));
MediaProcessor processor = chooseMediaProcessor(MediaType.getMediaType(detectedMimeType), detectedMimeType);

// Process the resource depending on the mode.
final ResourceExtractionResult result;
ResourceExtractionResult result;
if (processor == null) {
result = null;
} else if (mode == ProcessingMode.FULL) {
} else {
result = getResourceExtractionResult(resource, mode, mainThumbnailAvailable, processor, detectedMimeType);
}
// No oEmbed detected try with text processing
if (processor instanceof OEmbedProcessor && result == null) {
processor = textProcessor;
result = getResourceExtractionResult(resource, mode, mainThumbnailAvailable, processor, detectedMimeType);
}
// Done
return result;
}

private static ResourceExtractionResult getResourceExtractionResult(Resource resource, ProcessingMode mode,
boolean mainThumbnailAvailable, MediaProcessor processor, String detectedMimeType) throws MediaExtractionException {
ResourceExtractionResult result;
// Process the resource depending on the mode.
if (mode == ProcessingMode.FULL) {
result = processor.extractMetadata(resource, detectedMimeType, mainThumbnailAvailable);
} else {
result = processor.copyMetadata(resource, detectedMimeType);
}

// Done
return result;
}

Expand All @@ -281,7 +319,7 @@ public void close() throws IOException {
* @return true if and only if resources of the given type need to be downloaded before performing full processing.
*/
boolean shouldDownloadForFullProcessing(String mimeType) {
return Optional.of(MediaType.getMediaType(mimeType)).map(this::chooseMediaProcessor)
return Optional.of(MediaType.getMediaType(mimeType)).map(mediaType -> chooseMediaProcessor(mediaType, mimeType))
.map(MediaProcessor::downloadResourceForFullProcessing).orElse(Boolean.FALSE);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package eu.europeana.metis.mediaprocessing.extraction;

import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.checkValidWidthAndHeightDimensions;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getDurationFromModel;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromJson;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.getOEmbedModelFromXml;
import static eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedValidation.isValidOEmbedPhotoOrVideo;

import eu.europeana.metis.mediaprocessing.exception.MediaExtractionException;
import eu.europeana.metis.mediaprocessing.extraction.oembed.OEmbedModel;
import eu.europeana.metis.mediaprocessing.model.ImageResourceMetadata;
import eu.europeana.metis.mediaprocessing.model.Resource;
import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResult;
import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResultImpl;
import eu.europeana.metis.mediaprocessing.model.VideoResourceMetadata;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Locale;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The type Oembed processor.
*/
public class OEmbedProcessor implements MediaProcessor {

/**
* The constant LOGGER.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(OEmbedProcessor.class);

/**
* Process a resource by extracting the metadata from the content.
*
* @param resource The resource to process. Note that the resource may not have content (see
* {@link MediaExtractorImpl#shouldDownloadForFullProcessing(String)}).
* @param detectedMimeType The mime type that was detected for this resource (may deviate from the mime type that was provided
* by the server and which is stored in {@link Resource#getProvidedMimeType()}).
* @param mainThumbnailAvailable Whether the main thumbnail for this record is available. This may influence the decision on
* whether to generate a thumbnail for this resource.
* @return The result of the processing.
* @throws MediaExtractionException In case something went wrong during the extraction.
*/
@Override
public ResourceExtractionResult extractMetadata(Resource resource, String detectedMimeType, boolean mainThumbnailAvailable)
throws MediaExtractionException {

ResourceExtractionResult resourceExtractionResult;
// the content for this oembed needs to be downloaded to be examined
if (resource.getContentPath() != null) {
try {
OEmbedModel embedModel = null;
if (detectedMimeType.startsWith("application/json")) {
jeortizquan marked this conversation as resolved.
Show resolved Hide resolved
embedModel = getOEmbedModelFromJson(Files.readAllBytes(Paths.get(resource.getContentPath().toString())));
} else if (detectedMimeType.startsWith("application/xml")) {
embedModel = getOEmbedModelFromXml(Files.readAllBytes(Paths.get(resource.getContentPath().toString())));
}
if (isValidOEmbedPhotoOrVideo(embedModel)) {
checkValidWidthAndHeightDimensions(embedModel, resource.getResourceUrl());
resourceExtractionResult = getResourceExtractionResult(resource, detectedMimeType, embedModel);
} else {
LOGGER.warn("No oembed model found");
resourceExtractionResult = null;
}
} catch (IOException e) {
throw new MediaExtractionException("Unable to read OEmbedded resource", e);
}
} else {
resourceExtractionResult = null;
}

return resourceExtractionResult;
}

/**
* Process a resource by copying the metadata from the input without performing any extraction.
*
* @param resource The resource to process. The resource is not expected to have content.
* @param detectedMimeType The mime type that was detected for this resource (may deviate from the mime type that was provided
* by the server and which is stored in {@link Resource#getProvidedMimeType()}).
* @return The result of the processing.
* @throws MediaExtractionException In case something went wrong during the extraction.
*/
@Override
public ResourceExtractionResult copyMetadata(Resource resource, String detectedMimeType) throws MediaExtractionException {
return null;
}

/**
* @return Whether the processor needs the downloaded resource for full processing.
*/
@Override
public boolean downloadResourceForFullProcessing() {
return true;
}

private ResourceExtractionResult getResourceExtractionResult(Resource resource, String detectedMimeType,
OEmbedModel oEmbedModel) throws MediaExtractionException {
ResourceExtractionResult resourceExtractionResult;
if (oEmbedModel != null) {
switch (oEmbedModel.getType().toLowerCase(Locale.US)) {
case "photo" -> {
ImageResourceMetadata imageResourceMetadata = new ImageResourceMetadata(detectedMimeType,
resource.getResourceUrl(),
resource.getProvidedFileSize(), oEmbedModel.getWidth(), oEmbedModel.getHeight(), null, null, null);
resourceExtractionResult = new ResourceExtractionResultImpl(imageResourceMetadata);
}
case "video" -> {
Double duration = getDurationFromModel(oEmbedModel);
VideoResourceMetadata videoResourceMetadata = new VideoResourceMetadata(detectedMimeType,
resource.getResourceUrl(),
resource.getProvidedFileSize(), duration, null, oEmbedModel.getWidth(), oEmbedModel.getHeight(), null, null);
resourceExtractionResult = new ResourceExtractionResultImpl(videoResourceMetadata);
}
default -> resourceExtractionResult = null;
}
} else {
resourceExtractionResult = null;
}
return resourceExtractionResult;
}
}
Loading