diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 386a14cf37..0bc95bd4ae 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -26,6 +26,8 @@ import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import marquez.api.filter.JobRedirectFilter; +import marquez.api.filter.exclusions.Exclusions; +import marquez.api.filter.exclusions.ExclusionsConfig; import marquez.cli.DbMigrationCommand; import marquez.cli.DbRetentionCommand; import marquez.cli.MetadataCommand; @@ -139,6 +141,10 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) { // Add job to apply retention policy to database. env.lifecycle().manage(new DbRetentionJob(jdbi, config.getDbRetention())); } + + // set namespaceFilter + ExclusionsConfig exclusions = config.getExclude(); + Exclusions.use(exclusions); } private boolean isSentryEnabled(MarquezConfig config) { diff --git a/api/src/main/java/marquez/MarquezConfig.java b/api/src/main/java/marquez/MarquezConfig.java index 97263a64be..7336b06c22 100644 --- a/api/src/main/java/marquez/MarquezConfig.java +++ b/api/src/main/java/marquez/MarquezConfig.java @@ -12,6 +12,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.Setter; +import marquez.api.filter.exclusions.ExclusionsConfig; import marquez.db.FlywayFactory; import marquez.graphql.GraphqlConfig; import marquez.jobs.DbRetentionConfig; @@ -48,6 +49,10 @@ public class MarquezConfig extends Configuration { @JsonProperty("dbRetention") private DbRetentionConfig dbRetention; // OPTIONAL + @Getter + @JsonProperty("exclude") + private ExclusionsConfig exclude = new ExclusionsConfig(); + /** Returns {@code true} if a data retention policy has been configured. */ public boolean hasDbRetentionPolicy() { return (dbRetention != null); diff --git a/api/src/main/java/marquez/api/NamespaceResource.java b/api/src/main/java/marquez/api/NamespaceResource.java index df9dcdc0f3..fba07c1ce2 100644 --- a/api/src/main/java/marquez/api/NamespaceResource.java +++ b/api/src/main/java/marquez/api/NamespaceResource.java @@ -12,6 +12,7 @@ import com.codahale.metrics.annotation.Timed; import com.fasterxml.jackson.annotation.JsonProperty; import java.util.List; +import java.util.Optional; import javax.validation.Valid; import javax.validation.constraints.Min; import javax.ws.rs.Consumes; @@ -27,6 +28,8 @@ import lombok.NonNull; import lombok.Value; import marquez.api.exceptions.NamespaceNotFoundException; +import marquez.api.filter.exclusions.Exclusions; +import marquez.api.filter.exclusions.ExclusionsConfig; import marquez.common.models.NamespaceName; import marquez.service.ServiceFactory; import marquez.service.models.Namespace; @@ -74,7 +77,15 @@ public Response get(@PathParam("namespace") NamespaceName name) { public Response list( @QueryParam("limit") @DefaultValue("100") @Min(value = 0) int limit, @QueryParam("offset") @DefaultValue("0") @Min(value = 0) int offset) { - final List namespaces = namespaceService.findAll(limit, offset); + + final List namespaces = + Optional.ofNullable(Exclusions.namespaces()) + .map(ExclusionsConfig.NamespaceExclusions::getOnRead) + .filter(ExclusionsConfig.OnRead::isEnabled) + .map(ExclusionsConfig.OnRead::getPattern) + .map(pattern -> namespaceService.findAllWithExclusion(pattern, limit, offset)) + .orElseGet(() -> namespaceService.findAll(limit, offset)); + return Response.ok(new Namespaces(namespaces)).build(); } diff --git a/api/src/main/java/marquez/api/filter/exclusions/Exclusions.java b/api/src/main/java/marquez/api/filter/exclusions/Exclusions.java new file mode 100644 index 0000000000..9e8a324f1c --- /dev/null +++ b/api/src/main/java/marquez/api/filter/exclusions/Exclusions.java @@ -0,0 +1,20 @@ +package marquez.api.filter.exclusions; + +import com.google.common.collect.ClassToInstanceMap; +import com.google.common.collect.MutableClassToInstanceMap; +import lombok.NonNull; +import marquez.api.filter.exclusions.ExclusionsConfig.NamespaceExclusions; + +public final class Exclusions { + private Exclusions() {} + + private static final ClassToInstanceMap EXCLUSIONS = MutableClassToInstanceMap.create(); + + public static void use(@NonNull ExclusionsConfig config) { + EXCLUSIONS.put(ExclusionsConfig.NamespaceExclusions.class, config.getNamespaces()); + } + + public static NamespaceExclusions namespaces() { + return EXCLUSIONS.getInstance(NamespaceExclusions.class); + } +} diff --git a/api/src/main/java/marquez/api/filter/exclusions/ExclusionsConfig.java b/api/src/main/java/marquez/api/filter/exclusions/ExclusionsConfig.java new file mode 100644 index 0000000000..e2db7ef591 --- /dev/null +++ b/api/src/main/java/marquez/api/filter/exclusions/ExclusionsConfig.java @@ -0,0 +1,38 @@ +package marquez.api.filter.exclusions; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Getter; + +public class ExclusionsConfig { + @Getter @JsonProperty public NamespaceExclusions namespaces; + + public static class NamespaceExclusions { + @Getter + @JsonProperty("onRead") + public OnRead onRead; + + @Getter + @JsonProperty("onWrite") + public OnWrite onWrite; + } + + public static class OnRead { + @Getter + @JsonProperty("enabled") + public boolean enabled; + + @Getter + @JsonProperty("pattern") + public String pattern; + } + + public static class OnWrite { + @Getter + @JsonProperty("enabled") + public boolean enabled; + + @Getter + @JsonProperty("pattern") + public String pattern; + } +} diff --git a/api/src/main/java/marquez/db/NamespaceDao.java b/api/src/main/java/marquez/db/NamespaceDao.java index adc88f8f39..6241f0df0d 100644 --- a/api/src/main/java/marquez/db/NamespaceDao.java +++ b/api/src/main/java/marquez/db/NamespaceDao.java @@ -78,6 +78,10 @@ default Namespace upsertNamespaceMeta( @SqlQuery("SELECT * FROM namespaces ORDER BY name LIMIT :limit OFFSET :offset") List findAll(int limit, int offset); + @SqlQuery( + "SELECT * FROM namespaces WHERE name !~ :excluded ORDER BY name LIMIT :limit OFFSET :offset") + List findAllWithExclusion(String excluded, int limit, int offset); + @SqlQuery("UPDATE namespaces SET is_hidden=false WHERE name = :name RETURNING *") NamespaceRow undelete(String name); diff --git a/api/src/test/java/marquez/api/NamespaceResourceTest.java b/api/src/test/java/marquez/api/NamespaceResourceTest.java new file mode 100644 index 0000000000..8dcf7aeb4f --- /dev/null +++ b/api/src/test/java/marquez/api/NamespaceResourceTest.java @@ -0,0 +1,109 @@ +package marquez.api; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import java.util.List; +import javax.ws.rs.core.Response; +import marquez.api.NamespaceResource.Namespaces; +import marquez.api.filter.exclusions.Exclusions; +import marquez.api.filter.exclusions.ExclusionsConfig; +import marquez.common.models.NamespaceName; +import marquez.common.models.OwnerName; +import marquez.db.BaseDao; +import marquez.db.NamespaceDao; +import marquez.jdbi.MarquezJdbiExternalPostgresExtension; +import marquez.service.NamespaceService; +import marquez.service.ServiceFactory; +import marquez.service.models.Namespace; +import marquez.service.models.NamespaceMeta; +import org.jdbi.v3.core.Jdbi; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +@ExtendWith(MarquezJdbiExternalPostgresExtension.class) +public class NamespaceResourceTest { + + @Mock private ServiceFactory serviceFactory; + + @Mock private BaseDao baseDao; + + private static NamespaceDao namespaceDao; + private NamespaceService namespaceService; + private NamespaceResource namespaceResource; + + @BeforeEach + public void setUp(Jdbi jdbi) { + MockitoAnnotations.openMocks(this); + + namespaceDao = jdbi.onDemand(NamespaceDao.class); + when(baseDao.createNamespaceDao()).thenReturn(namespaceDao); + namespaceService = new NamespaceService(baseDao); + + when(serviceFactory.getNamespaceService()).thenReturn(namespaceService); + namespaceResource = new NamespaceResource(serviceFactory); + } + + @Test + void testFindAllFilter() { + var namespaceName1 = NamespaceName.of("postgres://localhost:5432"); + var namespaceMeta1 = new NamespaceMeta(new OwnerName("marquez"), null); + namespaceDao.upsertNamespaceMeta(namespaceName1, namespaceMeta1); + + var namespaceName2 = NamespaceName.of("excluded_namespace"); + var namespaceMeta2 = new NamespaceMeta(new OwnerName("yannick"), null); + namespaceDao.upsertNamespaceMeta(namespaceName2, namespaceMeta2); + + List namespaces = namespaceDao.findAllWithExclusion("excluded.*", 10, 0); + + // Assert that the namespaces list does not contain the excluded namespace + assertFalse( + namespaces.stream().anyMatch(namespace -> namespace.getName().equals(namespaceName2))); + } + + @Test + public void testListWithFilter() { + String filter = "excluded_.*"; + ExclusionsConfig exclusionsConfig = new ExclusionsConfig(); + ExclusionsConfig.NamespaceExclusions namespaceExclusions = + new ExclusionsConfig.NamespaceExclusions(); + ExclusionsConfig.OnRead onRead = new ExclusionsConfig.OnRead(); + onRead.enabled = true; + onRead.pattern = filter; + namespaceExclusions.onRead = onRead; + + exclusionsConfig.namespaces = namespaceExclusions; + Exclusions.use(exclusionsConfig); + + NamespaceName namespaceName = NamespaceName.of("excluded_namespace"); + OwnerName owner = new OwnerName("yannick"); + NamespaceMeta namespaceMeta = new NamespaceMeta(owner, "description"); + + namespaceDao.upsertNamespaceMeta(namespaceName, namespaceMeta); + + NamespaceService namespaceServiceSpy = spy(namespaceService); + doCallRealMethod() + .when(namespaceServiceSpy) + .findAllWithExclusion(eq(filter), anyInt(), anyInt()); + + Response response = namespaceResource.list(10, 0); + Namespaces namespaces = (Namespaces) response.getEntity(); + + // Check if the returned namespaces contain a namespace with the name + // "excluded_namespace" + boolean containsExcludedNamespace = + namespaces.getValue().stream() + .anyMatch(namespace -> namespace.getName().getValue().equals("excluded_namespace")); + + // Assert that the returned namespaces do not contain a namespace with the name + // "excluded_namespace" + assertFalse(containsExcludedNamespace); + } +} diff --git a/api/src/test/java/marquez/api/filter/exclusions/ExclusionsConfigTest.java b/api/src/test/java/marquez/api/filter/exclusions/ExclusionsConfigTest.java new file mode 100644 index 0000000000..a9f72ed555 --- /dev/null +++ b/api/src/test/java/marquez/api/filter/exclusions/ExclusionsConfigTest.java @@ -0,0 +1,39 @@ +package marquez.api.filter.exclusions; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +public class ExclusionsConfigTest { + @Test + public void testNamespaceExclusionsOnRead() { + ExclusionsConfig exclusionsConfig = new ExclusionsConfig(); + ExclusionsConfig.NamespaceExclusions namespaceExclusions = + new ExclusionsConfig.NamespaceExclusions(); + ExclusionsConfig.OnRead onRead = new ExclusionsConfig.OnRead(); + onRead.enabled = true; + onRead.pattern = "readPattern"; + namespaceExclusions.onRead = onRead; + + exclusionsConfig.namespaces = namespaceExclusions; + + assertEquals(true, exclusionsConfig.namespaces.onRead.enabled); + assertEquals("readPattern", exclusionsConfig.namespaces.onRead.pattern); + } + + @Test + public void testNamespaceExclusionsOnWrite() { + ExclusionsConfig exclusionsConfig = new ExclusionsConfig(); + ExclusionsConfig.NamespaceExclusions namespaceExclusions = + new ExclusionsConfig.NamespaceExclusions(); + ExclusionsConfig.OnWrite onWrite = new ExclusionsConfig.OnWrite(); + onWrite.enabled = false; + onWrite.pattern = "writePattern"; + namespaceExclusions.onWrite = onWrite; + + exclusionsConfig.namespaces = namespaceExclusions; + + assertEquals(false, exclusionsConfig.namespaces.onWrite.enabled); + assertEquals("writePattern", exclusionsConfig.namespaces.onWrite.pattern); + } +} diff --git a/docs/faq.md b/docs/faq.md index 52b1355400..f9d7f5cabd 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -89,3 +89,29 @@ java -jar marquez-api.jar db-retention \ --dry-run \ marquez.yml ``` + +### How do I filter namespaces using regex patterns in Marquez? + +In Marquez, you may find the need to exclude certain namespaces from being fetched. This can be particularly useful when you want to filter out namespaces based on specific patterns. To achieve this, you can use the `exclude` feature in the `marquez.yml` configuration file. + +Here's how you can set it up: + +```yaml +exclude: + namespaces: + onRead: + enabled: boolean + pattern: "||..." + onWrite: + enabled: boolean + pattern: "||..." +``` +In the above configuration: + +- `onRead.enabled: true` indicates that the exclusion should happen when reading namespaces. +- `pattern` is a string of regular expressions. Any namespace matching any of these patterns will be excluded. +Replace and with the actual regex patterns you want to use for filtering namespaces. You can add as many patterns as you need. + +This feature provides a flexible way to manage the namespaces that are read by Marquez, allowing you to fine-tune the list of namespaces that are presented in the UI. + +For the moment, the exclusion only works for filtering namespaces when Marquez is querying them from its database (onRead), but we plan to expand the same logic to databases and jobs not only on read, but also on write to prevent any unwanted data to be sent to the backend. \ No newline at end of file