From af68e61f38b672734729fb77bc719d9827710ee6 Mon Sep 17 00:00:00 2001 From: Richard Treier Date: Wed, 10 Jul 2024 12:45:12 +0300 Subject: [PATCH] fix: crawler not handling descriptions properly (#993) --- .../migration/V9__Broker_Integration.sql | 32 ++-- .../CrawlerExtensionContextBuilder.java | 3 +- .../fetching/model/FetchedCatalog.java | 6 + .../fetching/model/FetchedContractOffer.java | 6 + .../fetching/model/FetchedDataOffer.java | 6 + .../data_offers/DataOfferRecordUpdater.java | 17 +- .../ext/catalog/crawler/CrawlerTestDb.java | 1 - .../writing/ConnectorSuccessWriterTest.java | 146 ++++++++++++++++++ .../writing/DataOfferWriterTestDydi.java | 19 ++- .../asset/utils/ShortDescriptionBuilder.java | 4 +- 10 files changed, 211 insertions(+), 29 deletions(-) create mode 100644 extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java diff --git a/extensions/catalog-crawler/catalog-crawler-db/src/main/resources/db-crawler/migration/V9__Broker_Integration.sql b/extensions/catalog-crawler/catalog-crawler-db/src/main/resources/db-crawler/migration/V9__Broker_Integration.sql index fe46c6525..a93987578 100644 --- a/extensions/catalog-crawler/catalog-crawler-db/src/main/resources/db-crawler/migration/V9__Broker_Integration.sql +++ b/extensions/catalog-crawler/catalog-crawler-db/src/main/resources/db-crawler/migration/V9__Broker_Integration.sql @@ -32,22 +32,22 @@ alter table connector -- Data offers, additionally keyed by env ID create table data_offer ( - connector_id text not null, - asset_id text not null, - ui_asset_json jsonb not null, - created_at timestamp with time zone not null, - updated_at timestamp with time zone, - asset_title text collate alphanumeric_with_natural_sort not null, - description text not null default ''::text, - curator_organization_name text not null default ''::text, - data_category text not null default ''::text, - data_subcategory text not null default ''::text, - data_model text not null default ''::text, - transport_mode text not null default ''::text, - geo_reference_method text not null default ''::text, - keywords text[] not null default '{}'::text[], - keywords_comma_joined text not null default ''::text, - version text not null default ''::text, + connector_id text not null, + asset_id text not null, + ui_asset_json jsonb not null, + created_at timestamp with time zone not null, + updated_at timestamp with time zone, + asset_title text collate alphanumeric_with_natural_sort not null, + description_no_markdown text not null default ''::text, + short_description_no_markdown text not null default ''::text, + data_category text not null default ''::text, + data_subcategory text not null default ''::text, + data_model text not null default ''::text, + transport_mode text not null default ''::text, + geo_reference_method text not null default ''::text, + keywords text[] not null default '{}'::text[], + keywords_comma_joined text not null default ''::text, + version text not null default ''::text, primary key (connector_id, asset_id), constraint data_offer_connector_fkey foreign key (connector_id) references connector (connector_id) ); diff --git a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java index cb49d40a5..2437dd751 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java +++ b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java @@ -129,7 +129,8 @@ public static CrawlerExtensionContext buildContext( objectMapperJsonLd ); var contractOfferRecordUpdater = new ContractOfferRecordUpdater(); - var dataOfferRecordUpdater = new DataOfferRecordUpdater(connectorQueries); + var shortDescriptionBuilder = new ShortDescriptionBuilder(); + var dataOfferRecordUpdater = new DataOfferRecordUpdater(shortDescriptionBuilder); var contractOfferQueries = new ContractOfferQueries(); var dataOfferLimitsEnforcer = new DataOfferLimitsEnforcer(crawlerConfig, crawlerEventLogger); var dataOfferPatchBuilder = new CatalogPatchBuilder( diff --git a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java index d930eb223..027d2c8b3 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java +++ b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java @@ -16,7 +16,10 @@ import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Getter; +import lombok.RequiredArgsConstructor; import lombok.Setter; import lombok.experimental.FieldDefaults; @@ -27,6 +30,9 @@ */ @Getter @Setter +@Builder +@RequiredArgsConstructor +@AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) public class FetchedCatalog { ConnectorRef connectorRef; diff --git a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java index 64317498e..2b1993749 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java +++ b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java @@ -15,12 +15,18 @@ package de.sovity.edc.ext.catalog.crawler.crawling.fetching.model; import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Getter; +import lombok.NoArgsConstructor; import lombok.Setter; import lombok.experimental.FieldDefaults; @Getter @Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) public class FetchedContractOffer { String contractOfferId; diff --git a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java index 752fc98fd..a2e6fe99b 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java +++ b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java @@ -16,7 +16,10 @@ import de.sovity.edc.ext.wrapper.api.common.model.UiAsset; import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Getter; +import lombok.NoArgsConstructor; import lombok.Setter; import lombok.experimental.FieldDefaults; @@ -27,6 +30,9 @@ */ @Getter @Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) public class FetchedDataOffer { String assetId; diff --git a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java index 955c05271..c09a3d0b8 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java +++ b/extensions/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java @@ -16,11 +16,11 @@ import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; import de.sovity.edc.ext.catalog.crawler.crawling.writing.utils.ChangeTracker; -import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; import de.sovity.edc.ext.catalog.crawler.dao.utils.JsonbUtils; import de.sovity.edc.ext.catalog.crawler.db.jooq.tables.records.DataOfferRecord; import de.sovity.edc.ext.catalog.crawler.utils.JsonUtils2; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.ShortDescriptionBuilder; import lombok.RequiredArgsConstructor; import org.jooq.JSONB; @@ -36,8 +36,7 @@ */ @RequiredArgsConstructor public class DataOfferRecordUpdater { - - private final ConnectorQueries connectorQueries; + private final ShortDescriptionBuilder shortDescriptionBuilder; /** * Create a new {@link DataOfferRecord}. @@ -84,9 +83,15 @@ public boolean updateDataOffer( ); changes.setIfChanged( - blankIfNull(record.getDescription()), - blankIfNull(asset.getDescription()), - record::setDescription + blankIfNull(record.getDescriptionNoMarkdown()), + shortDescriptionBuilder.extractMarkdownText(blankIfNull(asset.getDescription())), + record::setDescriptionNoMarkdown + ); + + changes.setIfChanged( + blankIfNull(record.getShortDescriptionNoMarkdown()), + blankIfNull(asset.getDescriptionShortText()), + record::setShortDescriptionNoMarkdown ); changes.setIfChanged( diff --git a/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java b/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java index 7e7026048..a4057c57b 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java +++ b/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java @@ -17,7 +17,6 @@ public class CrawlerTestDb implements BeforeAllCallback, AfterAllCallback { private final TestDatabaseViaTestcontainers db = new TestDatabaseViaTestcontainers(); - private HikariDataSource dataSource = null; private DslContextFactory dslContextFactory = null; diff --git a/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java b/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java new file mode 100644 index 000000000..4e7d6a603 --- /dev/null +++ b/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.edc.ext.catalog.crawler.CrawlerTestDb; +import de.sovity.edc.ext.catalog.crawler.TestData; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedCatalog; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.db.jooq.Tables; +import de.sovity.edc.ext.catalog.crawler.db.jooq.enums.ConnectorContractOffersExceeded; +import de.sovity.edc.ext.catalog.crawler.db.jooq.enums.ConnectorDataOffersExceeded; +import de.sovity.edc.ext.catalog.crawler.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.wrapper.api.common.model.UiAsset; +import org.assertj.core.data.TemporalUnitLessThanOffset; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +class ConnectorSuccessWriterTest { + @RegisterExtension + private static final CrawlerTestDb TEST_DATABASE = new CrawlerTestDb(); + + ConnectorUpdateSuccessWriter connectorUpdateSuccessWriter; + + @BeforeEach + void setup() { + var container = new DataOfferWriterTestDydi(); + connectorUpdateSuccessWriter = container.getConnectorUpdateSuccessWriter(); + when(container.getCrawlerConfig().getMaxContractOffersPerDataOffer()).thenReturn(1); + when(container.getCrawlerConfig().getMaxDataOffersPerConnector()).thenReturn(1); + } + + @Test + void testDataOfferWriter_fullSingleUpdate() { + TEST_DATABASE.testTransaction(dsl -> { + // arrange + var connectorRef = TestData.connectorRef; + TestData.insertConnector(dsl, connectorRef, unused -> { + }); + var uiAsset = UiAsset.builder() + .assetId("assetId") + .title("title") + .description("# Description\n\n**with Markdown**") + .descriptionShortText("descriptionShortText") + .dataCategory("dataCategory") + .dataSubcategory("dataSubCategory") + .dataModel("dataModel") + .transportMode("transportMode") + .geoReferenceMethod("geoReferenceMethod") + .keywords(List.of("a", "b")) + .build(); + var fetchedContractOffer = FetchedContractOffer.builder() + .contractOfferId("contractOfferId") + .uiPolicyJson("\"test-policy\"") + .build(); + var fetchedDataOffer = FetchedDataOffer.builder() + .assetId("assetId") + .uiAsset(uiAsset) + .uiAssetJson("\"test\"") + .contractOffers(List.of(fetchedContractOffer)) + .build(); + var fetchedCatalog = FetchedCatalog.builder() + .connectorRef(connectorRef) + .dataOffers(List.of(fetchedDataOffer)) + .build(); + + // act + connectorUpdateSuccessWriter.handleConnectorOnline( + dsl, + connectorRef, + dsl.fetchOne( + Tables.CONNECTOR, + Tables.CONNECTOR.CONNECTOR_ID.eq(connectorRef.getConnectorId()) + ), + fetchedCatalog + ); + + // assert + var connector = dsl.fetchOne( + Tables.CONNECTOR, + Tables.CONNECTOR.CONNECTOR_ID.eq(connectorRef.getConnectorId()) + ); + var dataOffer = dsl.fetchOne( + Tables.DATA_OFFER, + DSL.and( + Tables.DATA_OFFER.CONNECTOR_ID.eq(connectorRef.getConnectorId()), + Tables.DATA_OFFER.ASSET_ID.eq("assetId") + ) + ); + var contractOffer = dsl.fetchOne( + Tables.CONTRACT_OFFER, + DSL.and( + Tables.CONTRACT_OFFER.CONNECTOR_ID.eq(connectorRef.getConnectorId()), + Tables.CONTRACT_OFFER.ASSET_ID.eq("assetId"), + Tables.CONTRACT_OFFER.CONTRACT_OFFER_ID.eq("contractOfferId") + ) + ); + + var now = OffsetDateTime.now(); + var minuteAccuracy = new TemporalUnitLessThanOffset(1, ChronoUnit.MINUTES); + assertThat(connector).isNotNull(); + assertThat(connector.getOnlineStatus()).isEqualTo(ConnectorOnlineStatus.ONLINE); + assertThat(connector.getLastRefreshAttemptAt()).isCloseTo(now, minuteAccuracy); + assertThat(connector.getLastSuccessfulRefreshAt()).isCloseTo(now, minuteAccuracy); + assertThat(connector.getDataOffersExceeded()).isEqualTo(ConnectorDataOffersExceeded.OK); + assertThat(connector.getContractOffersExceeded()).isEqualTo(ConnectorContractOffersExceeded.OK); + + assertThat(dataOffer).isNotNull(); + assertThat(dataOffer.getAssetTitle()).isEqualTo("title"); + assertThat(dataOffer.getDescriptionNoMarkdown()).isEqualTo("Description with Markdown"); + assertThat(dataOffer.getShortDescriptionNoMarkdown()).isEqualTo("descriptionShortText"); + assertThat(dataOffer.getDataCategory()).isEqualTo("dataCategory"); + assertThat(dataOffer.getDataSubcategory()).isEqualTo("dataSubCategory"); + assertThat(dataOffer.getDataModel()).isEqualTo("dataModel"); + assertThat(dataOffer.getTransportMode()).isEqualTo("transportMode"); + assertThat(dataOffer.getGeoReferenceMethod()).isEqualTo("geoReferenceMethod"); + assertThat(dataOffer.getKeywords()).containsExactly("a", "b"); + assertThat(dataOffer.getKeywordsCommaJoined()).isEqualTo("a, b"); + assertThat(dataOffer.getUiAssetJson().data()).isEqualTo("\"test\""); + + assertThat(contractOffer).isNotNull(); + assertThat(contractOffer.getUiPolicyJson().data()).isEqualTo("\"test-policy\""); + }); + } +} diff --git a/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java b/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java index fb39bcbec..a46cf1dc4 100644 --- a/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java +++ b/extensions/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java @@ -14,6 +14,7 @@ package de.sovity.edc.ext.catalog.crawler.crawling.writing; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; import de.sovity.edc.ext.catalog.crawler.dao.CatalogPatchApplier; import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferQueries; @@ -21,6 +22,7 @@ import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferQueries; import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater; import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.ShortDescriptionBuilder; import lombok.Value; import org.eclipse.edc.spi.system.configuration.Config; @@ -34,9 +36,8 @@ class DataOfferWriterTestDydi { ContractOfferQueries contractOfferQueries = new ContractOfferQueries(); ContractOfferRecordUpdater contractOfferRecordUpdater = new ContractOfferRecordUpdater(); ConnectorQueries connectorQueries = new ConnectorQueries(crawlerConfig); - DataOfferRecordUpdater dataOfferRecordUpdater = new DataOfferRecordUpdater( - connectorQueries - ); + ShortDescriptionBuilder shortDescriptionBuilder = new ShortDescriptionBuilder(); + DataOfferRecordUpdater dataOfferRecordUpdater = new DataOfferRecordUpdater(shortDescriptionBuilder); CatalogPatchBuilder catalogPatchBuilder = new CatalogPatchBuilder( contractOfferQueries, dataOfferQueries, @@ -45,4 +46,16 @@ class DataOfferWriterTestDydi { ); CatalogPatchApplier catalogPatchApplier = new CatalogPatchApplier(); ConnectorUpdateCatalogWriter connectorUpdateCatalogWriter = new ConnectorUpdateCatalogWriter(catalogPatchBuilder, catalogPatchApplier); + + // for the ConnectorUpdateSuccessWriterTest + CrawlerEventLogger crawlerEventLogger = new CrawlerEventLogger(); + DataOfferLimitsEnforcer dataOfferLimitsEnforcer = new DataOfferLimitsEnforcer( + crawlerConfig, + crawlerEventLogger + ); + ConnectorUpdateSuccessWriter connectorUpdateSuccessWriter = new ConnectorUpdateSuccessWriter( + crawlerEventLogger, + connectorUpdateCatalogWriter, + dataOfferLimitsEnforcer + ); } diff --git a/extensions/wrapper/wrapper-common-mappers/src/main/java/de/sovity/edc/ext/wrapper/api/common/mappers/asset/utils/ShortDescriptionBuilder.java b/extensions/wrapper/wrapper-common-mappers/src/main/java/de/sovity/edc/ext/wrapper/api/common/mappers/asset/utils/ShortDescriptionBuilder.java index 0d85792dd..1f59e9960 100644 --- a/extensions/wrapper/wrapper-common-mappers/src/main/java/de/sovity/edc/ext/wrapper/api/common/mappers/asset/utils/ShortDescriptionBuilder.java +++ b/extensions/wrapper/wrapper-common-mappers/src/main/java/de/sovity/edc/ext/wrapper/api/common/mappers/asset/utils/ShortDescriptionBuilder.java @@ -25,11 +25,11 @@ public String buildShortDescription(String descriptionMarkdown) { return null; } - var text = extractText(descriptionMarkdown); + var text = extractMarkdownText(descriptionMarkdown); return abbreviate(text, 300); } - String extractText(String markdown) { + public String extractMarkdownText(String markdown) { var options = new MutableDataSet(); var parser = Parser.builder(options).build(); var renderer = HtmlRenderer.builder(options).build();