Skip to content

Commit

Permalink
Add namespace filter option in config file (#2688)
Browse files Browse the repository at this point in the history
* feat: add namespace filter option in config

Signed-off-by: Yan <yannick.libert@gmail.com>
Signed-off-by: yanlibert <yannick.libert@gmail.com>

* doc: better comment

Signed-off-by: Yan <yannick.libert@gmail.com>
Signed-off-by: yanlibert <yannick.libert@gmail.com>

* feat: implement excludes for namespaces

Signed-off-by: yanlibert <yannick.libert@gmail.com>

* feat: relocate test to proper location

Signed-off-by: yanlibert <yannick.libert@gmail.com>

* feat: increased test coverage

Signed-off-by: yanlibert <yannick.libert@gmail.com>

* doc: add FAQ entry for filtering namespaces

Signed-off-by: yanlibert <yannick.libert@gmail.com>

* refacto: cleaner Exclusion interface and better yml usage

Signed-off-by: yanlibert <yannick.libert@gmail.com>

* feat: add Exclusion interface

Signed-off-by: yanlibert <yannick.libert@gmail.com>

---------

Signed-off-by: Yan <yannick.libert@gmail.com>
Signed-off-by: yanlibert <yannick.libert@gmail.com>
  • Loading branch information
yanlibert authored Dec 18, 2023
1 parent 0c4ff4d commit 83608bb
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 1 deletion.
6 changes: 6 additions & 0 deletions api/src/main/java/marquez/MarquezApp.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import marquez.api.filter.JobRedirectFilter;
import marquez.api.filter.exclusions.Exclusions;
import marquez.api.filter.exclusions.ExclusionsConfig;
import marquez.cli.DbMigrationCommand;
import marquez.cli.DbRetentionCommand;
import marquez.cli.MetadataCommand;
Expand Down Expand Up @@ -139,6 +141,10 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) {
// Add job to apply retention policy to database.
env.lifecycle().manage(new DbRetentionJob(jdbi, config.getDbRetention()));
}

// set namespaceFilter
ExclusionsConfig exclusions = config.getExclude();
Exclusions.use(exclusions);
}

private boolean isSentryEnabled(MarquezConfig config) {
Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/marquez/MarquezConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import marquez.api.filter.exclusions.ExclusionsConfig;
import marquez.db.FlywayFactory;
import marquez.graphql.GraphqlConfig;
import marquez.jobs.DbRetentionConfig;
Expand Down Expand Up @@ -48,6 +49,10 @@ public class MarquezConfig extends Configuration {
@JsonProperty("dbRetention")
private DbRetentionConfig dbRetention; // OPTIONAL

@Getter
@JsonProperty("exclude")
private ExclusionsConfig exclude = new ExclusionsConfig();

/** Returns {@code true} if a data retention policy has been configured. */
public boolean hasDbRetentionPolicy() {
return (dbRetention != null);
Expand Down
13 changes: 12 additions & 1 deletion api/src/main/java/marquez/api/NamespaceResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.codahale.metrics.annotation.Timed;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.List;
import java.util.Optional;
import javax.validation.Valid;
import javax.validation.constraints.Min;
import javax.ws.rs.Consumes;
Expand All @@ -27,6 +28,8 @@
import lombok.NonNull;
import lombok.Value;
import marquez.api.exceptions.NamespaceNotFoundException;
import marquez.api.filter.exclusions.Exclusions;
import marquez.api.filter.exclusions.ExclusionsConfig;
import marquez.common.models.NamespaceName;
import marquez.service.ServiceFactory;
import marquez.service.models.Namespace;
Expand Down Expand Up @@ -74,7 +77,15 @@ public Response get(@PathParam("namespace") NamespaceName name) {
public Response list(
@QueryParam("limit") @DefaultValue("100") @Min(value = 0) int limit,
@QueryParam("offset") @DefaultValue("0") @Min(value = 0) int offset) {
final List<Namespace> namespaces = namespaceService.findAll(limit, offset);

final List<Namespace> namespaces =
Optional.ofNullable(Exclusions.namespaces())
.map(ExclusionsConfig.NamespaceExclusions::getOnRead)
.filter(ExclusionsConfig.OnRead::isEnabled)
.map(ExclusionsConfig.OnRead::getPattern)
.map(pattern -> namespaceService.findAllWithExclusion(pattern, limit, offset))
.orElseGet(() -> namespaceService.findAll(limit, offset));

return Response.ok(new Namespaces(namespaces)).build();
}

Expand Down
20 changes: 20 additions & 0 deletions api/src/main/java/marquez/api/filter/exclusions/Exclusions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package marquez.api.filter.exclusions;

import com.google.common.collect.ClassToInstanceMap;
import com.google.common.collect.MutableClassToInstanceMap;
import lombok.NonNull;
import marquez.api.filter.exclusions.ExclusionsConfig.NamespaceExclusions;

public final class Exclusions {
private Exclusions() {}

private static final ClassToInstanceMap<Object> EXCLUSIONS = MutableClassToInstanceMap.create();

public static void use(@NonNull ExclusionsConfig config) {
EXCLUSIONS.put(ExclusionsConfig.NamespaceExclusions.class, config.getNamespaces());
}

public static NamespaceExclusions namespaces() {
return EXCLUSIONS.getInstance(NamespaceExclusions.class);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package marquez.api.filter.exclusions;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Getter;

public class ExclusionsConfig {
@Getter @JsonProperty public NamespaceExclusions namespaces;

public static class NamespaceExclusions {
@Getter
@JsonProperty("onRead")
public OnRead onRead;

@Getter
@JsonProperty("onWrite")
public OnWrite onWrite;
}

public static class OnRead {
@Getter
@JsonProperty("enabled")
public boolean enabled;

@Getter
@JsonProperty("pattern")
public String pattern;
}

public static class OnWrite {
@Getter
@JsonProperty("enabled")
public boolean enabled;

@Getter
@JsonProperty("pattern")
public String pattern;
}
}
4 changes: 4 additions & 0 deletions api/src/main/java/marquez/db/NamespaceDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ default Namespace upsertNamespaceMeta(
@SqlQuery("SELECT * FROM namespaces ORDER BY name LIMIT :limit OFFSET :offset")
List<Namespace> findAll(int limit, int offset);

@SqlQuery(
"SELECT * FROM namespaces WHERE name !~ :excluded ORDER BY name LIMIT :limit OFFSET :offset")
List<Namespace> findAllWithExclusion(String excluded, int limit, int offset);

@SqlQuery("UPDATE namespaces SET is_hidden=false WHERE name = :name RETURNING *")
NamespaceRow undelete(String name);

Expand Down
109 changes: 109 additions & 0 deletions api/src/test/java/marquez/api/NamespaceResourceTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package marquez.api;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doCallRealMethod;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

import java.util.List;
import javax.ws.rs.core.Response;
import marquez.api.NamespaceResource.Namespaces;
import marquez.api.filter.exclusions.Exclusions;
import marquez.api.filter.exclusions.ExclusionsConfig;
import marquez.common.models.NamespaceName;
import marquez.common.models.OwnerName;
import marquez.db.BaseDao;
import marquez.db.NamespaceDao;
import marquez.jdbi.MarquezJdbiExternalPostgresExtension;
import marquez.service.NamespaceService;
import marquez.service.ServiceFactory;
import marquez.service.models.Namespace;
import marquez.service.models.NamespaceMeta;
import org.jdbi.v3.core.Jdbi;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;

@ExtendWith(MarquezJdbiExternalPostgresExtension.class)
public class NamespaceResourceTest {

@Mock private ServiceFactory serviceFactory;

@Mock private BaseDao baseDao;

private static NamespaceDao namespaceDao;
private NamespaceService namespaceService;
private NamespaceResource namespaceResource;

@BeforeEach
public void setUp(Jdbi jdbi) {
MockitoAnnotations.openMocks(this);

namespaceDao = jdbi.onDemand(NamespaceDao.class);
when(baseDao.createNamespaceDao()).thenReturn(namespaceDao);
namespaceService = new NamespaceService(baseDao);

when(serviceFactory.getNamespaceService()).thenReturn(namespaceService);
namespaceResource = new NamespaceResource(serviceFactory);
}

@Test
void testFindAllFilter() {
var namespaceName1 = NamespaceName.of("postgres://localhost:5432");
var namespaceMeta1 = new NamespaceMeta(new OwnerName("marquez"), null);
namespaceDao.upsertNamespaceMeta(namespaceName1, namespaceMeta1);

var namespaceName2 = NamespaceName.of("excluded_namespace");
var namespaceMeta2 = new NamespaceMeta(new OwnerName("yannick"), null);
namespaceDao.upsertNamespaceMeta(namespaceName2, namespaceMeta2);

List<Namespace> namespaces = namespaceDao.findAllWithExclusion("excluded.*", 10, 0);

// Assert that the namespaces list does not contain the excluded namespace
assertFalse(
namespaces.stream().anyMatch(namespace -> namespace.getName().equals(namespaceName2)));
}

@Test
public void testListWithFilter() {
String filter = "excluded_.*";
ExclusionsConfig exclusionsConfig = new ExclusionsConfig();
ExclusionsConfig.NamespaceExclusions namespaceExclusions =
new ExclusionsConfig.NamespaceExclusions();
ExclusionsConfig.OnRead onRead = new ExclusionsConfig.OnRead();
onRead.enabled = true;
onRead.pattern = filter;
namespaceExclusions.onRead = onRead;

exclusionsConfig.namespaces = namespaceExclusions;
Exclusions.use(exclusionsConfig);

NamespaceName namespaceName = NamespaceName.of("excluded_namespace");
OwnerName owner = new OwnerName("yannick");
NamespaceMeta namespaceMeta = new NamespaceMeta(owner, "description");

namespaceDao.upsertNamespaceMeta(namespaceName, namespaceMeta);

NamespaceService namespaceServiceSpy = spy(namespaceService);
doCallRealMethod()
.when(namespaceServiceSpy)
.findAllWithExclusion(eq(filter), anyInt(), anyInt());

Response response = namespaceResource.list(10, 0);
Namespaces namespaces = (Namespaces) response.getEntity();

// Check if the returned namespaces contain a namespace with the name
// "excluded_namespace"
boolean containsExcludedNamespace =
namespaces.getValue().stream()
.anyMatch(namespace -> namespace.getName().getValue().equals("excluded_namespace"));

// Assert that the returned namespaces do not contain a namespace with the name
// "excluded_namespace"
assertFalse(containsExcludedNamespace);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package marquez.api.filter.exclusions;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

public class ExclusionsConfigTest {
@Test
public void testNamespaceExclusionsOnRead() {
ExclusionsConfig exclusionsConfig = new ExclusionsConfig();
ExclusionsConfig.NamespaceExclusions namespaceExclusions =
new ExclusionsConfig.NamespaceExclusions();
ExclusionsConfig.OnRead onRead = new ExclusionsConfig.OnRead();
onRead.enabled = true;
onRead.pattern = "readPattern";
namespaceExclusions.onRead = onRead;

exclusionsConfig.namespaces = namespaceExclusions;

assertEquals(true, exclusionsConfig.namespaces.onRead.enabled);
assertEquals("readPattern", exclusionsConfig.namespaces.onRead.pattern);
}

@Test
public void testNamespaceExclusionsOnWrite() {
ExclusionsConfig exclusionsConfig = new ExclusionsConfig();
ExclusionsConfig.NamespaceExclusions namespaceExclusions =
new ExclusionsConfig.NamespaceExclusions();
ExclusionsConfig.OnWrite onWrite = new ExclusionsConfig.OnWrite();
onWrite.enabled = false;
onWrite.pattern = "writePattern";
namespaceExclusions.onWrite = onWrite;

exclusionsConfig.namespaces = namespaceExclusions;

assertEquals(false, exclusionsConfig.namespaces.onWrite.enabled);
assertEquals("writePattern", exclusionsConfig.namespaces.onWrite.pattern);
}
}
26 changes: 26 additions & 0 deletions docs/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,29 @@ java -jar marquez-api.jar db-retention \
--dry-run \
marquez.yml
```

### How do I filter namespaces using regex patterns in Marquez?

In Marquez, you may find the need to exclude certain namespaces from being fetched. This can be particularly useful when you want to filter out namespaces based on specific patterns. To achieve this, you can use the `exclude` feature in the `marquez.yml` configuration file.

Here's how you can set it up:

```yaml
exclude:
namespaces:
onRead:
enabled: boolean
pattern: "<pattern0>|<pattern1>|..."
onWrite:
enabled: boolean
pattern: "<pattern0>|<pattern1>|..."
```
In the above configuration:

- `onRead.enabled: true` indicates that the exclusion should happen when reading namespaces.
- `pattern` is a string of regular expressions. Any namespace matching any of these patterns will be excluded.
Replace <pattern0> and <pattern1> with the actual regex patterns you want to use for filtering namespaces. You can add as many patterns as you need.

This feature provides a flexible way to manage the namespaces that are read by Marquez, allowing you to fine-tune the list of namespaces that are presented in the UI.

For the moment, the exclusion only works for filtering namespaces when Marquez is querying them from its database (onRead), but we plan to expand the same logic to databases and jobs not only on read, but also on write to prevent any unwanted data to be sent to the backend.

0 comments on commit 83608bb

Please sign in to comment.