Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add namespace filter option in config file #2688

Merged
merged 11 commits into from
Dec 18, 2023
11 changes: 11 additions & 0 deletions api/src/main/java/marquez/MarquezApp.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import marquez.api.filter.JobRedirectFilter;
import marquez.api.filter.exclusions.ExclusionsConfig;
import marquez.api.filter.exclusions.ExclusionsFilter;
import marquez.cli.DbMigrationCommand;
import marquez.cli.DbRetentionCommand;
import marquez.cli.MetadataCommand;
Expand Down Expand Up @@ -139,6 +141,15 @@ public void run(@NonNull MarquezConfig config, @NonNull Environment env) {
// Add job to apply retention policy to database.
env.lifecycle().manage(new DbRetentionJob(jdbi, config.getDbRetention()));
}

// set namespaceFilter
if (config.hasExcludingPatterns()) {
ExclusionsConfig exclusions = config.getExclude();
if (exclusions.namespaces.onRead) {
ExclusionsFilter.setNamespacesReadFilter(exclusions.namespaces.patterns);
}
;
}
}

private boolean isSentryEnabled(MarquezConfig config) {
Expand Down
10 changes: 10 additions & 0 deletions api/src/main/java/marquez/MarquezConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import marquez.api.filter.exclusions.ExclusionsConfig;
import marquez.db.FlywayFactory;
import marquez.graphql.GraphqlConfig;
import marquez.jobs.DbRetentionConfig;
Expand Down Expand Up @@ -48,8 +49,17 @@ public class MarquezConfig extends Configuration {
@JsonProperty("dbRetention")
private DbRetentionConfig dbRetention; // OPTIONAL

@Getter
@JsonProperty("exclude")
private ExclusionsConfig exclude; // OPTIONAL

/** Returns {@code true} if a data retention policy has been configured. */
public boolean hasDbRetentionPolicy() {
return (dbRetention != null);
}

/** Returns {@code true} if an exclude pattern has been configured. */
public boolean hasExcludingPatterns() {
return (exclude != null);
}
}
13 changes: 11 additions & 2 deletions api/src/main/java/marquez/api/NamespaceResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import lombok.NonNull;
import lombok.Value;
import marquez.api.exceptions.NamespaceNotFoundException;
import marquez.api.filter.exclusions.ExclusionsFilter;
import marquez.common.models.NamespaceName;
import marquez.service.ServiceFactory;
import marquez.service.models.Namespace;
Expand Down Expand Up @@ -74,8 +75,16 @@ public Response get(@PathParam("namespace") NamespaceName name) {
public Response list(
@QueryParam("limit") @DefaultValue("100") @Min(value = 0) int limit,
@QueryParam("offset") @DefaultValue("0") @Min(value = 0) int offset) {
final List<Namespace> namespaces = namespaceService.findAll(limit, offset);
return Response.ok(new Namespaces(namespaces)).build();
final String namespaceFilter = ExclusionsFilter.getNamespacesReadFilter();
Copy link
Member

@wslulciuc wslulciuc Dec 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great work! To clean up the code, I suggest (FYI, these are only suggestions and feel free to modify as needed):

  1. Define class Exclusions with a cleaner interface defined in pkg marquez.exclusions, or marquez.common (see below) in place of ExclusionsFilter
  2. Use lambdas to call NamespaceService.findAll() or NamespaceService.findAllWith()

Exclusions.java

package marquez.common;

import com.google.common.collect.ClassToInstanceMap;
import com.google.common.collect.MutableClassToInstanceMap;
import javax.annotation.Nullable;
import lombok.NonNull;

public final class Exclusions {
  private Exclusions() {}

  private static final ClassToInstanceMap<Object> EXCLUSIONS = MutableClassToInstanceMap.create();
 

  public static void use(@NonNull ExclusionsConfig config) {
    // Build exclusions map using config.
    EXCLUSIONS.put(NamespaceExclusion.class, new NamespaceExclusion(...));
  }

  public static NamespaceExclusion namespaces() {
    return EXCLUSIONS.getInstance(NamespaceExclusion.class);
  }
 
  // To avoid duplicating the pattern (onRead, onWrite) we can also have a 
  // ON_READ_EXCLUSIONS and ON_WRITE_EXCLUSIONS maps;
  // therefore the contract would just be NamespaceExclusion(@NonNull String pattern);
  // but, 'Exclusions.namespaces().onRead()' is more concise.
  public record NamespaceExclusion(@Nullable String onRead, @Nullable String onWrite) {}
}

Then, you can chain your calls:

NamespaceResource.list()

final List<Namespace> namespaces =
  Optional.ofNullable(Exclusions.namespaces().onRead())
    .map(exclusion -> namespaceService.findAllWithExclusion(exclusion, limit, offset))
    .orElseGet(() -> namespaceService.findAll(limit, offset));

return Response.ok(new Namespaces(namespaces)).build();

Copy link
Member

@wslulciuc wslulciuc Dec 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yanlibert I'm wondering, as we work through the Exclusions code, would it make sense to just have:

exclude:
  namespaces:
    onRead:
      enabled: boolean                      # (default: true)
      pattern: "<pattern0>|<pattern1>|..."
    onWrite: "<pattern0>|<pattern1>|..."
      enabled: boolean                      # (default: true)
      pattern: "<pattern0>|<pattern1>|..."

where onRead and onWrite can be different (or equal). This would give greater control and flexibility over what to exclude 😅

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a great suggestion, I refactored the code accordingly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wslulciuc I also think option 2 where pattern is a string of regex is simpler, and more straightforward to implement as I'm not a java dev 😄
I increased the coverage and refactored the code as suggested by implementing a cleaner Exclusions interface.

final List<Namespace> allNamespaces = namespaceService.findAll(limit, offset);

if (namespaceFilter != null) {
final List<Namespace> FilterNamespaces =
namespaceService.findAllFilter(namespaceFilter, limit, offset);
return Response.ok(new Namespaces(FilterNamespaces)).build();
} else {
return Response.ok(new Namespaces(allNamespaces)).build();
}
}

@Timed
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package marquez.api.filter.exclusions;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Getter;

public class ExclusionsConfig {
@Getter @JsonProperty public NamespaceExclusion namespaces;

public static class NamespaceExclusion {
@Getter @JsonProperty public boolean onRead;
@Getter @JsonProperty public boolean onWrite;
@Getter @JsonProperty public String patterns;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a single pattern or a list of patterns separated in some way?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pawel-big-lebowski yes, sorry about that, I forgot to include the proper documentation. I added a section to the FAQ to explain the usage, I'd be happy to move it to another place that is more suitable if needed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have 2 options:

  1. Define patterns as an array of patterns (i.e. Set<String> patterns)
  2. or, simply define patterns as a string with as REGEXP (which in that case, we can use String pattern)

I think option 1 might less error prone and more readable (though might make adding an exclusion filter in SQL tricky). For example, in marquez.yml:

exclude:
  namespaces:
     onRead: boolean
     onWrite: boolean
     patterns: [<pattern0>, <pattern1>,...]

while option 2 would be:

exclude:
  namespaces:
     onRead: boolean
     onWrite: boolean
     pattern: "<pattern0>|<pattern1>|..."

I think either option works and perhaps we just rename patterns to pattern for clarity and go with option 2 as it's the simpler option (both for configuration and implementation).

}
;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package marquez.api.filter.exclusions;

public class ExclusionsFilter {
private static String namespacesFilter;

public static void setNamespacesReadFilter(String filter) {
namespacesFilter = filter;
}

public static String getNamespacesReadFilter() {
return namespacesFilter;
}
}
4 changes: 4 additions & 0 deletions api/src/main/java/marquez/db/NamespaceDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ default Namespace upsertNamespaceMeta(
@SqlQuery("SELECT * FROM namespaces ORDER BY name LIMIT :limit OFFSET :offset")
List<Namespace> findAll(int limit, int offset);

@SqlQuery(
"SELECT * FROM namespaces WHERE name !~ :excluded ORDER BY name LIMIT :limit OFFSET :offset")
List<Namespace> findAllFilter(String excluded, int limit, int offset);
yanlibert marked this conversation as resolved.
Show resolved Hide resolved

@SqlQuery("UPDATE namespaces SET is_hidden=false WHERE name = :name RETURNING *")
NamespaceRow undelete(String name);

Expand Down
97 changes: 97 additions & 0 deletions api/src/test/java/marquez/api/NamespaceResourceTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package marquez.api;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doCallRealMethod;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

import java.util.List;
import javax.ws.rs.core.Response;
import marquez.api.NamespaceResource.Namespaces;
import marquez.api.filter.exclusions.ExclusionsFilter;
import marquez.common.models.NamespaceName;
import marquez.common.models.OwnerName;
import marquez.db.BaseDao;
import marquez.db.NamespaceDao;
import marquez.jdbi.MarquezJdbiExternalPostgresExtension;
import marquez.service.NamespaceService;
import marquez.service.ServiceFactory;
import marquez.service.models.Namespace;
import marquez.service.models.NamespaceMeta;
import org.jdbi.v3.core.Jdbi;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;

@ExtendWith(MarquezJdbiExternalPostgresExtension.class)
public class NamespaceResourceTest {

@Mock private ServiceFactory serviceFactory;

@Mock private BaseDao baseDao;

private static NamespaceDao namespaceDao;
private NamespaceService namespaceService;
private NamespaceResource namespaceResource;

@BeforeEach
public void setUp(Jdbi jdbi) {
MockitoAnnotations.openMocks(this);

namespaceDao = jdbi.onDemand(NamespaceDao.class);
when(baseDao.createNamespaceDao()).thenReturn(namespaceDao);
namespaceService = new NamespaceService(baseDao);

when(serviceFactory.getNamespaceService()).thenReturn(namespaceService);
namespaceResource = new NamespaceResource(serviceFactory);
}

@Test
void testFindAllFilter() {
var namespaceName1 = NamespaceName.of("postgres://localhost:5432");
var namespaceMeta1 = new NamespaceMeta(new OwnerName("marquez"), null);
namespaceDao.upsertNamespaceMeta(namespaceName1, namespaceMeta1);

var namespaceName2 = NamespaceName.of("excluded_namespace");
var namespaceMeta2 = new NamespaceMeta(new OwnerName("yannick"), null);
namespaceDao.upsertNamespaceMeta(namespaceName2, namespaceMeta2);

List<Namespace> namespaces = namespaceDao.findAllFilter("excluded.*", 10, 0);

// Assert that the namespaces list does not contain the excluded namespace
assertFalse(
namespaces.stream().anyMatch(namespace -> namespace.getName().equals(namespaceName2)));
}

@Test
public void testListWithFilter() {
String filter = "excluded_.*";
ExclusionsFilter.setNamespacesReadFilter(filter);

NamespaceName namespaceName = NamespaceName.of("excluded_namespace");
OwnerName owner = new OwnerName("yannick");
NamespaceMeta namespaceMeta = new NamespaceMeta(owner, "description");

namespaceDao.upsertNamespaceMeta(namespaceName, namespaceMeta);

NamespaceService namespaceServiceSpy = spy(namespaceService);
doCallRealMethod().when(namespaceServiceSpy).findAllFilter(eq(filter), anyInt(), anyInt());

Response response = namespaceResource.list(10, 0);
Namespaces namespaces = (Namespaces) response.getEntity();

// Check if the returned namespaces contain a namespace with the name
// "excluded_namespace"
boolean containsExcludedNamespace =
namespaces.getValue().stream()
.anyMatch(namespace -> namespace.getName().getValue().equals("excluded_namespace"));

// Assert that the returned namespaces do not contain a namespace with the name
// "excluded_namespace"
assertFalse(containsExcludedNamespace);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package marquez.api.filter.exclusions;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import marquez.api.filter.exclusions.ExclusionsConfig.NamespaceExclusion;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class ExclusionsConfigTest {

private ExclusionsConfig exclusionsConfig;
private NamespaceExclusion namespaceExclusion;

@BeforeEach
void setUp() {
exclusionsConfig = new ExclusionsConfig();
namespaceExclusion = new NamespaceExclusion();
}

@Test
void testNamespaceExclusionOnRead() {
namespaceExclusion.onRead = true;
assertEquals(true, namespaceExclusion.isOnRead());
}

@Test
void testNamespaceExclusionOnWrite() {
namespaceExclusion.onWrite = true;
assertEquals(true, namespaceExclusion.isOnWrite());
}

@Test
void testNamespaceExclusionPatterns() {
String patterns = "test-pattern";
namespaceExclusion.patterns = patterns;
assertEquals(patterns, namespaceExclusion.getPatterns());
}

@Test
void testExclusionsConfigNamespaces() {
exclusionsConfig.namespaces = namespaceExclusion;
assertNotNull(exclusionsConfig.getNamespaces());
assertEquals(namespaceExclusion, exclusionsConfig.getNamespaces());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package marquez.api.filter.exclusions;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class ExclusionsFilterTest {

private static final String TEST_FILTER = "test-filter";

@BeforeEach
public void setUp() {
ExclusionsFilter.setNamespacesReadFilter(null);
}

@Test
public void testSetAndGetNamespacesReadFilter() {
ExclusionsFilter.setNamespacesReadFilter(TEST_FILTER);
String result = ExclusionsFilter.getNamespacesReadFilter();
assertEquals(TEST_FILTER, result);
}
}