From c2fef6fe47939eba3925c42e188a8e2e332ea8fe Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:00:23 +0530 Subject: [PATCH 01/26] Fixes: https://cwe.mitre.org/data/definitions/338.html --- .../tests/cypress/cypress/e2e/mutations/dataset_ownership.js | 2 +- .../tests/cypress/cypress/e2e/mutations/ingestion_source.js | 2 +- .../tests/cypress/cypress/e2e/mutations/managing_secrets.js | 2 +- .../tests/cypress/cypress/e2e/settings/managing_groups.js | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js index 552c3d460ade9b..d8af3c6657ae02 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js @@ -1,4 +1,4 @@ -const test_id = Math.floor(Math.random() * 100000); +const test_id = crypto.getRandomValues(new Uint32Array(1))[0]; const username = `Example Name ${test_id}`; const email = `example${test_id}@example.com`; const password = "Example password"; diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js index 6c5dd778106448..20fa4b23ec83fd 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js @@ -1,5 +1,5 @@ -const number = Math.floor(Math.random() * 100000); +const number = crypto.getRandomValues(new Uint32Array(1))[0]; const accound_id = `account${number}`; const warehouse_id = `warehouse${number}`; const username = `user${number}`; diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js index 77fd63b9cae02f..81446f925947b9 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js @@ -1,4 +1,4 @@ -const number = Math.floor(Math.random() * 100000); +const number = crypto.getRandomValues(new Uint32Array(1))[0]; const accound_id = `account${number}`; const warehouse_id = `warehouse${number}`; const username = `user${number}`; diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js index d9f69cd9a5ec42..3ccaa48774d147 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js @@ -1,4 +1,4 @@ -const test_id = Math.floor(Math.random() * 100000); +const test_id = crypto.getRandomValues(new Uint32Array(1))[0]; const username = `Example Name ${test_id}`; const email = `example${test_id}@example.com` const password = "Example password" From 6ae0cd14ffa5f7de4f666e907c9d72dee64d9847 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:04:03 +0530 Subject: [PATCH 02/26] Fixes: https://cwe.mitre.org/data/definitions/20.html --- .../src/app/entity/shared/ExternalUrlButton.tsx | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx index dce74c02cdb345..0a3ab3185cf5ab 100644 --- a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx +++ b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx @@ -1,10 +1,11 @@ import React from 'react'; import { EntityType } from '../../../types.generated'; import analytics, { EventType, EntityActionType } from '../../analytics'; +import { message } from 'antd'; import UrlButton from './UrlButton'; -const GITHUB_LINK = 'github.com'; -const GITHUB = 'GitHub'; +const GITHUB = 'github.com'; +const ALLOWED_GITHUB_HOSTS = ['github.com', 'www.github.com']; interface Props { externalUrl: string; @@ -24,8 +25,13 @@ export default function ExternalUrlButton({ externalUrl, platformName, entityTyp } let displayedName = platformName; - if (externalUrl.toLocaleLowerCase().includes(GITHUB_LINK)) { - displayedName = GITHUB; + try { + const host = new URL(externalUrl).host; + if (ALLOWED_GITHUB_HOSTS.includes(host.toLocaleLowerCase())) { + displayedName = GITHUB; + } + } catch(e: any) { + message.error({ content: `Not a valid URL! \n ${e?.message || ''}`, duration: 3 }); } return ( From c65b614249a6cce8c6fdf2bbda7e7fa1c70b4d6a Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:04:33 +0530 Subject: [PATCH 03/26] Fixes: https://cwe.mitre.org/data/definitions/20.html --- .../controllers/SsoCallbackController.java | 3 ++- .../auth/RevokeAccessTokenResolver.java | 2 +- .../ingest/secret/DeleteSecretResolver.java | 8 ++++---- .../resolvers/ingest/secret/SecretUtils.java | 8 ++++---- .../utils/translateFieldPathSegment.tsx | 2 +- .../models/extractor/FieldExtractor.java | 2 +- .../src/datahub/configuration/git.py | 12 ++++++++--- .../src/datahub/ingestion/source/mode.py | 2 +- .../systemmetadata/ESSystemMetadataDAO.java | 5 +---- .../metadata/services/SecretService.java | 8 ++++---- .../filter/AuthenticationFilter.java | 2 +- .../authentication/AuthServiceController.java | 20 ++++++------------- .../IngestDataPlatformInstancesStep.java | 2 +- .../datahub/graphql/GraphQLController.java | 2 +- .../registry/SchemaRegistryController.java | 6 +++++- .../metadata/service/RollbackService.java | 3 ++- 16 files changed, 44 insertions(+), 43 deletions(-) diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java index 5e30bf976b8196..750886570bf406 100644 --- a/datahub-frontend/app/controllers/SsoCallbackController.java +++ b/datahub-frontend/app/controllers/SsoCallbackController.java @@ -66,7 +66,8 @@ public SsoCallbackController( public CompletionStage handleCallback(String protocol, Http.Request request) { if (shouldHandleCallback(protocol)) { - log.debug(String.format("Handling SSO callback. Protocol: %s", protocol)); + log.debug("Handling SSO callback. Protocol: {}", + _ssoManager.getSsoProvider().protocol().getCommonName()); return callback(request) .handle( (res, e) -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java index 79c8f4c15fe8cc..411a0d61e5aebf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java @@ -40,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final QueryContext context = environment.getContext(); final String tokenId = bindArgument(environment.getArgument("tokenId"), String.class); - log.info("User {} revoking access token {}", context.getActorUrn(), tokenId); + log.info("User {} revoking access token", context.getActorUrn()); if (isAuthorizedToRevokeToken(context, tokenId)) { try { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java index 424c34a86c0c7b..1a869af5bb6678 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java @@ -22,16 +22,16 @@ public DeleteSecretResolver(final EntityClient entityClient) { public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); if (IngestionAuthUtils.canManageSecrets(context)) { - final String secretUrn = environment.getArgument("urn"); - final Urn urn = Urn.createFromString(secretUrn); + final String inputUrn = environment.getArgument("urn"); + final Urn urn = Urn.createFromString(inputUrn); return CompletableFuture.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); - return secretUrn; + return inputUrn; } catch (Exception e) { throw new RuntimeException( - String.format("Failed to perform delete against secret with urn %s", secretUrn), + String.format("Failed to perform delete against secret with urn %s", inputUrn), e); } }); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java index 225a5801adec94..fa8a31dad89ef0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java @@ -18,14 +18,14 @@ static String encrypt(String value, String secret) { MessageDigest sha = null; try { key = secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-1"); + sha = MessageDigest.getInstance("SHA-256"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5Padding"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.ENCRYPT_MODE, secretKey); return Base64.getEncoder() .encodeToString(cipher.doFinal(value.getBytes(StandardCharsets.UTF_8))); @@ -41,14 +41,14 @@ static String decrypt(String encryptedValue, String secret) { MessageDigest sha = null; try { key = secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-1"); + sha = MessageDigest.getInstance("SHA-256"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5PADDING"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.DECRYPT_MODE, secretKey); return new String(cipher.doFinal(Base64.getDecoder().decode(encryptedValue))); } catch (Exception e) { diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx index 7153a38a32c3b5..734580eb26f5dd 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx @@ -28,7 +28,7 @@ export default function translateFieldPathSegment(fieldPathSegment, i, fieldPath // structs that qualify a union are represented as [union]union_field.[type=QualifiedStruct].qualified_struct_field // we convert into union_field. (QualifiedStruct) qualified_struct_field if (fieldPathSegment.startsWith('[type=') && fieldPathSegment.endsWith(']')) { - const typeName = fieldPathSegment.replace('[type=', '').replace(']', ''); + const typeName = fieldPathSegment.replace(/\[type=/g, '').replace(/\]/g, ''); // if the qualified struct is the last element, just show the qualified struct if (i === fieldPathParts.length - 1) { return ` ${typeName}`; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java index 899f66e66ea5ae..a1a38558c96fd5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java @@ -63,7 +63,7 @@ public static Map> extractFields( } else { List valueList = (List) value.get(); // If the field is a nested list of values, flatten it - for (int i = 0; i < numArrayWildcards - 1; i++) { + for (int i = 0; i < (int) numArrayWildcards - 1; i++) { valueList = valueList.stream() .flatMap(v -> ((List) v).stream()) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 3c76c8da0d5717..5ba2b9d3a0158a 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -1,5 +1,6 @@ import pathlib from typing import Any, Dict, Optional, Union +from urllib.parse import urlparse from pydantic import Field, FilePath, SecretStr, validator @@ -39,9 +40,14 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: - if repo.startswith("github.com/"): - repo = f"https://{repo}" - elif repo.startswith("gitlab.com"): + repo_host = urlparse(repo).hostname + allowedHosts = [ + "github.com", + "www.github.com", + "gitlab.com", + "www.gitlab.com" + ] + if repo_host in allowedHosts: repo = f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 8dcdc5eeef4042..1fed07488ac0fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -593,7 +593,7 @@ def _replace_definitions(self, raw_query: str) -> str: def _parse_definition_name(self, definition_variable: str) -> Tuple[str, str]: name, alias = "", "" # i.e '{{ @join_on_definition as alias}}' - name_match = re.findall("@[a-zA-z]+", definition_variable) + name_match = re.findall("@[a-zA-Z]+", definition_variable) if len(name_match): name = name_match[0][1:] alias_match = re.findall( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java index cf1674ac004809..a5c2fb04b5ce39 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java @@ -54,10 +54,7 @@ public Optional getTaskStatus(@Nonnull String nodeId, long task try { return client.tasks().get(taskRequest, RequestOptions.DEFAULT); } catch (IOException e) { - log.error( - String.format( - "ERROR: Failed to get task status for %s:%d. See stacktrace for a more detailed error:", - nodeId, taskId)); + log.error("ERROR: Failed to get task status: ", e); e.printStackTrace(); } return Optional.empty(); diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java index bea03235abfb4c..9d8a09ddffbfa4 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java @@ -43,14 +43,14 @@ public String encrypt(String value) { MessageDigest sha = null; try { key = _secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-1"); + sha = MessageDigest.getInstance("SHA-256"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5Padding"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.ENCRYPT_MODE, secretKey); return _encoder.encodeToString(cipher.doFinal(value.getBytes(StandardCharsets.UTF_8))); } catch (Exception e) { @@ -65,14 +65,14 @@ public String decrypt(String encryptedValue) { MessageDigest sha = null; try { key = _secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-1"); + sha = MessageDigest.getInstance("SHA-256"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5PADDING"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.DECRYPT_MODE, secretKey); return new String(cipher.doFinal(_decoder.decode(encryptedValue))); } catch (Exception e) { diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java index dd68e4d36f14e4..ee2efd2ae95365 100644 --- a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java +++ b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java @@ -92,7 +92,7 @@ public void doFilter(ServletRequest request, ServletResponse response, FilterCha "Failed to authenticate request. Received an AuthenticationExpiredException from authenticator chain.", e); ((HttpServletResponse) response) - .sendError(HttpServletResponse.SC_UNAUTHORIZED, e.getMessage()); + .sendError(HttpServletResponse.SC_UNAUTHORIZED, "Unauthorized to perform this action."); return; } diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index bb6aa1ed231d78..c779cc37f4d9dc 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -123,9 +123,7 @@ CompletableFuture> generateSessionTokenForUser( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format( - "Failed to parse json while attempting to generate session token %s", jsonStr)); + log.error("Failed to parse json while attempting to generate session token ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -189,8 +187,7 @@ CompletableFuture> signUp(final HttpEntity httpEn try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format("Failed to parse json while attempting to create native user %s", jsonStr)); + log.error("Failed to parse json while attempting to create native user ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -235,7 +232,7 @@ CompletableFuture> signUp(final HttpEntity httpEn try { Urn inviteTokenUrn = _inviteTokenService.getInviteTokenUrn(inviteTokenString); if (!_inviteTokenService.isInviteTokenValid(systemOperationContext, inviteTokenUrn)) { - log.error(String.format("Invalid invite token %s", inviteTokenString)); + log.error("Invalid invite token !"); return new ResponseEntity<>(HttpStatus.BAD_REQUEST); } @@ -279,8 +276,7 @@ CompletableFuture> resetNativeUserCredentials( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format("Failed to parse json while attempting to create native user %s", jsonStr)); + log.error("Failed to parse json while attempting to create native user ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -338,9 +334,7 @@ CompletableFuture> verifyNativeUserCredentials( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format( - "Failed to parse json while attempting to verify native user password %s", jsonStr)); + log.error("Failed to parse json while attempting to verify native user password ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -383,9 +377,7 @@ CompletableFuture> track(final HttpEntity httpEnt try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format( - "Failed to parse json while attempting to track analytics event %s", jsonStr)); + log.error("Failed to parse json while attempting to track analytics event ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index f242106e3fe4b4..df5cd588c9b951 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -61,7 +61,7 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc long numEntities = _migrationsDao.countEntities(); int start = 0; - while (start < numEntities) { + while (start < (int) numEntities) { log.info( "Reading urns {} to {} from the aspects table to generate dataplatform instance aspects", start, diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index 31e4b58a56e74e..ebe64a59981a36 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -70,7 +70,7 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error(String.format("Failed to parse json %s", jsonStr)); + log.error("Failed to parse json ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java index b9b4dd1ec01b1e..e4aa92daa55a60 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java @@ -299,7 +299,11 @@ public ResponseEntity register( }) .orElseGet( () -> { - log.error("Couldn't find topic with name {}.", topicName); + if (topicName.matches("^[a-zA-Z0-9._-]+$")) { + log.error("Couldn't find topic with name {}.", topicName); + } else { + log.error("Couldn't find topic (Malformed topic name)"); + } return new ResponseEntity<>(HttpStatus.NOT_FOUND); }); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java index 01af399c9b1155..1b255a253f59ea 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java @@ -173,7 +173,8 @@ public RollbackResponse rollbackIngestion( // Rollback timeseries aspects DeleteAspectValuesResult timeseriesRollbackResult = timeseriesAspectService.rollbackTimeseriesAspects(opContext, runId); - rowsDeletedFromEntityDeletion += timeseriesRollbackResult.getNumDocsDeleted(); + rowsDeletedFromEntityDeletion += + timeseriesRollbackResult.getNumDocsDeleted().intValue(); log.info("finished deleting {} rows", deletedRows.size()); int aspectsReverted = deletedRows.size() + rowsDeletedFromEntityDeletion; From 9e57051a8be596496c5b87deb9e2e2f4ed965e17 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:12:39 +0530 Subject: [PATCH 04/26] logging user input --- datahub-frontend/app/controllers/SsoCallbackController.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java index 750886570bf406..5e30bf976b8196 100644 --- a/datahub-frontend/app/controllers/SsoCallbackController.java +++ b/datahub-frontend/app/controllers/SsoCallbackController.java @@ -66,8 +66,7 @@ public SsoCallbackController( public CompletionStage handleCallback(String protocol, Http.Request request) { if (shouldHandleCallback(protocol)) { - log.debug("Handling SSO callback. Protocol: {}", - _ssoManager.getSsoProvider().protocol().getCommonName()); + log.debug(String.format("Handling SSO callback. Protocol: %s", protocol)); return callback(request) .handle( (res, e) -> { From c480f4b0afc6533b13666a76a16a1feae4067def Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:13:14 +0530 Subject: [PATCH 05/26] logging token --- .../graphql/resolvers/auth/RevokeAccessTokenResolver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java index 411a0d61e5aebf..79c8f4c15fe8cc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java @@ -40,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final QueryContext context = environment.getContext(); final String tokenId = bindArgument(environment.getArgument("tokenId"), String.class); - log.info("User {} revoking access token", context.getActorUrn()); + log.info("User {} revoking access token {}", context.getActorUrn(), tokenId); if (isAuthorizedToRevokeToken(context, tokenId)) { try { From 3836520f08a88060f5e7c5858935c9e6d8fdb322 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:14:30 +0530 Subject: [PATCH 06/26] codeQL error secrets cannot be logged --- .../resolvers/ingest/secret/DeleteSecretResolver.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java index 1a869af5bb6678..424c34a86c0c7b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java @@ -22,16 +22,16 @@ public DeleteSecretResolver(final EntityClient entityClient) { public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); if (IngestionAuthUtils.canManageSecrets(context)) { - final String inputUrn = environment.getArgument("urn"); - final Urn urn = Urn.createFromString(inputUrn); + final String secretUrn = environment.getArgument("urn"); + final Urn urn = Urn.createFromString(secretUrn); return CompletableFuture.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); - return inputUrn; + return secretUrn; } catch (Exception e) { throw new RuntimeException( - String.format("Failed to perform delete against secret with urn %s", inputUrn), + String.format("Failed to perform delete against secret with urn %s", secretUrn), e); } }); From 4ec61830fc786faf8c73742d20dde426fd56b07d Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:15:27 +0530 Subject: [PATCH 07/26] Fixes: https://cwe.mitre.org/data/definitions/116.html --- .../dataset/profile/schema/utils/translateFieldPathSegment.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx index 734580eb26f5dd..7153a38a32c3b5 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx @@ -28,7 +28,7 @@ export default function translateFieldPathSegment(fieldPathSegment, i, fieldPath // structs that qualify a union are represented as [union]union_field.[type=QualifiedStruct].qualified_struct_field // we convert into union_field. (QualifiedStruct) qualified_struct_field if (fieldPathSegment.startsWith('[type=') && fieldPathSegment.endsWith(']')) { - const typeName = fieldPathSegment.replace(/\[type=/g, '').replace(/\]/g, ''); + const typeName = fieldPathSegment.replace('[type=', '').replace(']', ''); // if the qualified struct is the last element, just show the qualified struct if (i === fieldPathParts.length - 1) { return ` ${typeName}`; From 0d33396843c0b5c953f5b0f7040355202a6ddc51 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:17:18 +0530 Subject: [PATCH 08/26] use inputUrn --- .../resolvers/ingest/secret/DeleteSecretResolver.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java index 424c34a86c0c7b..1a869af5bb6678 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java @@ -22,16 +22,16 @@ public DeleteSecretResolver(final EntityClient entityClient) { public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); if (IngestionAuthUtils.canManageSecrets(context)) { - final String secretUrn = environment.getArgument("urn"); - final Urn urn = Urn.createFromString(secretUrn); + final String inputUrn = environment.getArgument("urn"); + final Urn urn = Urn.createFromString(inputUrn); return CompletableFuture.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); - return secretUrn; + return inputUrn; } catch (Exception e) { throw new RuntimeException( - String.format("Failed to perform delete against secret with urn %s", secretUrn), + String.format("Failed to perform delete against secret with urn %s", inputUrn), e); } }); From 08559ded9a1da986b3883ff778ce10776c60f0b7 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:17:58 +0530 Subject: [PATCH 09/26] not logging token --- .../graphql/resolvers/auth/RevokeAccessTokenResolver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java index 79c8f4c15fe8cc..411a0d61e5aebf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java @@ -40,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final QueryContext context = environment.getContext(); final String tokenId = bindArgument(environment.getArgument("tokenId"), String.class); - log.info("User {} revoking access token {}", context.getActorUrn(), tokenId); + log.info("User {} revoking access token", context.getActorUrn()); if (isAuthorizedToRevokeToken(context, tokenId)) { try { From 8dd36281de62e14cac1ff89da53d99383039ba46 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:18:39 +0530 Subject: [PATCH 10/26] Revert change --- .../dataset/profile/schema/utils/translateFieldPathSegment.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx index 7153a38a32c3b5..734580eb26f5dd 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/translateFieldPathSegment.tsx @@ -28,7 +28,7 @@ export default function translateFieldPathSegment(fieldPathSegment, i, fieldPath // structs that qualify a union are represented as [union]union_field.[type=QualifiedStruct].qualified_struct_field // we convert into union_field. (QualifiedStruct) qualified_struct_field if (fieldPathSegment.startsWith('[type=') && fieldPathSegment.endsWith(']')) { - const typeName = fieldPathSegment.replace('[type=', '').replace(']', ''); + const typeName = fieldPathSegment.replace(/\[type=/g, '').replace(/\]/g, ''); // if the qualified struct is the last element, just show the qualified struct if (i === fieldPathParts.length - 1) { return ` ${typeName}`; From ae10f851f4044d0d5ea7ca0a2e4ebf2385295dce Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:19:47 +0530 Subject: [PATCH 11/26] do not log user input --- datahub-frontend/app/controllers/SsoCallbackController.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java index 5e30bf976b8196..750886570bf406 100644 --- a/datahub-frontend/app/controllers/SsoCallbackController.java +++ b/datahub-frontend/app/controllers/SsoCallbackController.java @@ -66,7 +66,8 @@ public SsoCallbackController( public CompletionStage handleCallback(String protocol, Http.Request request) { if (shouldHandleCallback(protocol)) { - log.debug(String.format("Handling SSO callback. Protocol: %s", protocol)); + log.debug("Handling SSO callback. Protocol: {}", + _ssoManager.getSsoProvider().protocol().getCommonName()); return callback(request) .handle( (res, e) -> { From f36854c7baf015a92475c9977c654cc7aa04e70e Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:20:52 +0530 Subject: [PATCH 12/26] Revert sha --- .../datahub/graphql/resolvers/ingest/secret/SecretUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java index fa8a31dad89ef0..87a3e5cb79ebfc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java @@ -18,7 +18,7 @@ static String encrypt(String value, String secret) { MessageDigest sha = null; try { key = secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-256"); + sha = MessageDigest.getInstance("SHA-1"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); @@ -41,7 +41,7 @@ static String decrypt(String encryptedValue, String secret) { MessageDigest sha = null; try { key = secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-256"); + sha = MessageDigest.getInstance("SHA-1"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); From 52f6911f9407f703d947a53092160c4a7d7be153 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:22:52 +0530 Subject: [PATCH 13/26] revert sha --- .../io/datahubproject/metadata/services/SecretService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java index 9d8a09ddffbfa4..48e7f80173cfde 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java @@ -43,7 +43,7 @@ public String encrypt(String value) { MessageDigest sha = null; try { key = _secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-256"); + sha = MessageDigest.getInstance("SHA-1"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); @@ -65,7 +65,7 @@ public String decrypt(String encryptedValue) { MessageDigest sha = null; try { key = _secret.getBytes(StandardCharsets.UTF_8); - sha = MessageDigest.getInstance("SHA-256"); + sha = MessageDigest.getInstance("SHA-1"); key = sha.digest(key); key = Arrays.copyOf(key, 16); secretKey = new SecretKeySpec(key, "AES"); From 2ff8427a1c37bfec982808dba91124db5dae5b72 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:34:18 +0530 Subject: [PATCH 14/26] lint fix --- metadata-ingestion/src/datahub/configuration/git.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 5ba2b9d3a0158a..b0537adbbb18e3 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -41,12 +41,7 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: repo_host = urlparse(repo).hostname - allowedHosts = [ - "github.com", - "www.github.com", - "gitlab.com", - "www.gitlab.com" - ] + allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] if repo_host in allowedHosts: repo = f"https://{repo}" elif repo.count("/") == 1: From 03be73e4d135eb1f01e1ec8be752102fe509455f Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:35:48 +0530 Subject: [PATCH 15/26] Linting... --- datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx index 0a3ab3185cf5ab..bb3cc3169c0cdb 100644 --- a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx +++ b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx @@ -1,7 +1,7 @@ import React from 'react'; +import { message } from 'antd'; import { EntityType } from '../../../types.generated'; import analytics, { EventType, EntityActionType } from '../../analytics'; -import { message } from 'antd'; import UrlButton from './UrlButton'; const GITHUB = 'github.com'; From 5f55071972da7f3ef82634d0a92eba12f86191aa Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 00:46:04 +0530 Subject: [PATCH 16/26] destructure object --- datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx index bb3cc3169c0cdb..f705d7e206715e 100644 --- a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx +++ b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx @@ -26,7 +26,7 @@ export default function ExternalUrlButton({ externalUrl, platformName, entityTyp let displayedName = platformName; try { - const host = new URL(externalUrl).host; + const { host } = new URL(externalUrl); if (ALLOWED_GITHUB_HOSTS.includes(host.toLocaleLowerCase())) { displayedName = GITHUB; } From e6c2b572417fc5236dde809559c553a1fff6ec3f Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 01:05:18 +0530 Subject: [PATCH 17/26] linting fix --- metadata-ingestion/src/datahub/configuration/git.py | 7 +++---- .../com/linkedin/metadata/service/RollbackService.java | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index b0537adbbb18e3..3c76c8da0d5717 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -1,6 +1,5 @@ import pathlib from typing import Any, Dict, Optional, Union -from urllib.parse import urlparse from pydantic import Field, FilePath, SecretStr, validator @@ -40,9 +39,9 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: - repo_host = urlparse(repo).hostname - allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] - if repo_host in allowedHosts: + if repo.startswith("github.com/"): + repo = f"https://{repo}" + elif repo.startswith("gitlab.com"): repo = f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java index 1b255a253f59ea..403665120c6868 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java @@ -173,8 +173,7 @@ public RollbackResponse rollbackIngestion( // Rollback timeseries aspects DeleteAspectValuesResult timeseriesRollbackResult = timeseriesAspectService.rollbackTimeseriesAspects(opContext, runId); - rowsDeletedFromEntityDeletion += - timeseriesRollbackResult.getNumDocsDeleted().intValue(); + rowsDeletedFromEntityDeletion += timeseriesRollbackResult.getNumDocsDeleted().intValue(); log.info("finished deleting {} rows", deletedRows.size()); int aspectsReverted = deletedRows.size() + rowsDeletedFromEntityDeletion; From 64cc678270f0a6d811fda44bc4e843a6956f768c Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 01:25:06 +0530 Subject: [PATCH 18/26] Update git.py --- metadata-ingestion/src/datahub/configuration/git.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 3c76c8da0d5717..fb417eec690a4d 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -1,5 +1,6 @@ import pathlib from typing import Any, Dict, Optional, Union +from urllib.parse import urlparse from pydantic import Field, FilePath, SecretStr, validator @@ -39,10 +40,13 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: - if repo.startswith("github.com/"): - repo = f"https://{repo}" - elif repo.startswith("gitlab.com"): - repo = f"https://{repo}" + repo_host = urlparse(repo).hostname + allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] + index = allowedHosts.index(repo_host) + if index == 0 or index == 1: + repo = _GITHUB_PREFIX + elif index == 2 or index == 3: + repo = _GITLAB_PREFIX elif repo.count("/") == 1: repo = f"https://github.com/{repo}" From c4436526490e9a4fbdf8aad7e8e714799caaa220 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 01:31:54 +0530 Subject: [PATCH 19/26] Update logic --- metadata-ingestion/src/datahub/configuration/git.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index fb417eec690a4d..ed42bd200e5aa1 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -42,11 +42,8 @@ class GitReference(ConfigModel): def simplify_repo_url(cls, repo: str) -> str: repo_host = urlparse(repo).hostname allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] - index = allowedHosts.index(repo_host) - if index == 0 or index == 1: - repo = _GITHUB_PREFIX - elif index == 2 or index == 3: - repo = _GITLAB_PREFIX + if repo_host in allowedHosts and (repo.startswith("github.com/") or repo.startswith("gitlab.com")): + return f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" From e563b584edf3bb76c9604d1a80a19f5182e820dd Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 01:43:13 +0530 Subject: [PATCH 20/26] lint --- metadata-ingestion/src/datahub/configuration/git.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index ed42bd200e5aa1..84422f275cd207 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -42,7 +42,9 @@ class GitReference(ConfigModel): def simplify_repo_url(cls, repo: str) -> str: repo_host = urlparse(repo).hostname allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] - if repo_host in allowedHosts and (repo.startswith("github.com/") or repo.startswith("gitlab.com")): + if repo_host in allowedHosts and ( + repo.startswith("github.com/") or repo.startswith("gitlab.com") + ): return f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" From 2816404739eddd63425530f5b3913825d3cf1910 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 12:17:18 +0530 Subject: [PATCH 21/26] Update allowed hosts logic --- metadata-ingestion/src/datahub/configuration/git.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 84422f275cd207..101d4ae6c478f7 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -40,11 +40,9 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: - repo_host = urlparse(repo).hostname - allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] - if repo_host in allowedHosts and ( - repo.startswith("github.com/") or repo.startswith("gitlab.com") - ): + repo_host = urlparse(repo).hostname.lstrip("www.") + allowedHosts = ["github.com", "gitlab.com"] + if repo_host in allowedHosts: return f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" From a4d208d7d12c5af6414b275ed63ed56f532b9bfa Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 12:24:34 +0530 Subject: [PATCH 22/26] Using regex --- metadata-ingestion/src/datahub/configuration/git.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 101d4ae6c478f7..fde7c6604204ad 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -1,4 +1,5 @@ import pathlib +import re from typing import Any, Dict, Optional, Union from urllib.parse import urlparse @@ -40,7 +41,8 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: - repo_host = urlparse(repo).hostname.lstrip("www.") + repo_host = urlparse(repo).hostname + repo_host = re.sub(r"^www\.", "", repo_host) allowedHosts = ["github.com", "gitlab.com"] if repo_host in allowedHosts: return f"https://{repo}" From a2e46ea2973992f9adcf0449f33e6d491ed70c5e Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 12:32:21 +0530 Subject: [PATCH 23/26] type fix --- metadata-ingestion/src/datahub/configuration/git.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index fde7c6604204ad..2183ccf0be4a7e 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -42,7 +42,7 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: repo_host = urlparse(repo).hostname - repo_host = re.sub(r"^www\.", "", repo_host) + repo_host = re.sub(r"^www\.", "", str(repo_host)) allowedHosts = ["github.com", "gitlab.com"] if repo_host in allowedHosts: return f"https://{repo}" From a8aaf389369092d486dc11993ee54c7468139dba Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Fri, 19 Apr 2024 12:54:13 +0530 Subject: [PATCH 24/26] Revert changes for git --- metadata-ingestion/src/datahub/configuration/git.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 2183ccf0be4a7e..84422f275cd207 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -1,5 +1,4 @@ import pathlib -import re from typing import Any, Dict, Optional, Union from urllib.parse import urlparse @@ -42,9 +41,10 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: repo_host = urlparse(repo).hostname - repo_host = re.sub(r"^www\.", "", str(repo_host)) - allowedHosts = ["github.com", "gitlab.com"] - if repo_host in allowedHosts: + allowedHosts = ["github.com", "www.github.com", "gitlab.com", "www.gitlab.com"] + if repo_host in allowedHosts and ( + repo.startswith("github.com/") or repo.startswith("gitlab.com") + ): return f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" From e61569c1182c67a07be2976ddcb9e4ad3456a552 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Tue, 9 Jul 2024 20:29:53 +0530 Subject: [PATCH 25/26] Update ExternalUrlButton.tsx --- datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx index 876b595f54ad47..d821cbfc01355e 100644 --- a/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx +++ b/datahub-web-react/src/app/entity/shared/ExternalUrlButton.tsx @@ -1,5 +1,4 @@ import React from 'react'; -import { message } from 'antd'; import { EntityType } from '../../../types.generated'; import analytics, { EventType, EntityActionType } from '../../analytics'; import UrlButton from './UrlButton'; From a92d1fa5a8b57e9275c03c996130d956e9c31fc8 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Tue, 9 Jul 2024 21:04:22 +0530 Subject: [PATCH 26/26] Update FieldExtractor.java --- .../linkedin/metadata/models/extractor/FieldExtractor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java index a1a38558c96fd5..656805891d9dc1 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java @@ -42,7 +42,7 @@ public static Map> extractFields( if (!value.isPresent()) { extractedFields.put(fieldSpec, Collections.emptyList()); } else { - long numArrayWildcards = getNumArrayWildcards(fieldSpec.getPath()); + int numArrayWildcards = (int) getNumArrayWildcards(fieldSpec.getPath()); // Not an array field if (numArrayWildcards == 0) { // For maps, convert it into a list of the form key=value (Filter out long values) @@ -63,7 +63,7 @@ public static Map> extractFields( } else { List valueList = (List) value.get(); // If the field is a nested list of values, flatten it - for (int i = 0; i < (int) numArrayWildcards - 1; i++) { + for (int i = 0; i < numArrayWildcards - 1; i++) { valueList = valueList.stream() .flatMap(v -> ((List) v).stream())