Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jira and Confluence connector features #2682

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions backend/danswer/connectors/confluence/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ def __init__(
continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
# if a page has one of the labels specified in this list, we will just
# skip it. This is generally used to avoid indexing extra sensitive
# pages.
labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
# pages. Added default None to backward compatibility
labels_to_skip: list[str] = None or CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Consider using a default argument that doesn't rely on short-circuit evaluation

) -> None:
self.batch_size = batch_size
self.continue_on_failure = continue_on_failure
Expand Down Expand Up @@ -488,6 +488,7 @@ def _attachment_to_content(
cls,
confluence_client: Confluence,
attachment: dict[str, Any],
labels_to_skip: set[str],
) -> str | None:
"""If it returns None, assume that we should skip this attachment."""
if attachment["metadata"]["mediaType"] in [
Expand Down Expand Up @@ -518,6 +519,14 @@ def _attachment_to_content(
)
return None

if labels_to_skip:
attachment_labels = [
label["name"] for label in attachment["metadata"]["labels"]["results"]
]
label_intersection = labels_to_skip.intersection(attachment_labels)
if label_intersection and attachment_labels:
return None
Comment on lines +522 to +528
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: This block could be moved to a separate method for better readability


extracted_text = extract_file_text(
io.BytesIO(response.content),
file_name=attachment["title"],
Expand Down Expand Up @@ -554,7 +563,7 @@ def _fetch_attachments(
continue

attachment_content = self._attachment_to_content(
confluence_client, attachment
confluence_client, attachment, self.labels_to_skip
)
if attachment_content:
files_attachment_content.append(attachment_content)
Expand Down Expand Up @@ -679,7 +688,7 @@ def _get_attachment_batch(
self.wiki_base, attachment["_links"]["download"]
)
attachment_content = self._attachment_to_content(
self.confluence_client, attachment
self.confluence_client, attachment, self.labels_to_skip
)
if attachment_content is None:
continue
Expand Down
34 changes: 31 additions & 3 deletions backend/danswer/connectors/danswer_jira/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from danswer.connectors.danswer_jira.utils import CustomFieldExtractor
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
Expand Down Expand Up @@ -106,6 +107,7 @@ def fetch_jira_issues_batch(
batch_size: int = INDEX_BATCH_SIZE,
comment_email_blacklist: tuple[str, ...] = (),
labels_to_skip: set[str] | None = None,
custom_fields: dict | None = None,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Consider adding type hints for the custom_fields parameter

) -> tuple[list[Document], int]:
doc_batch = []

Expand Down Expand Up @@ -186,6 +188,13 @@ def fetch_jira_issues_batch(
if labels:
metadata_dict["label"] = labels

# add custom fields
if custom_fields:
issue_custom_fields = CustomFieldExtractor.get_issue_custom_fields(
jira, custom_fields
)
metadata_dict = {**metadata_dict, **issue_custom_fields}

doc_batch.append(
Document(
id=page_url,
Expand All @@ -209,20 +218,37 @@ def __init__(
batch_size: int = INDEX_BATCH_SIZE,
# if a ticket has one of the labels specified in this list, we will just
# skip it. This is generally used to avoid indexing extra sensitive
# tickets.
labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,
# tickets. If labels to skip not presented in UI, get it from .env
# basically added for the backward compatibility
labels_to_skip: list[str] = None or JIRA_CONNECTOR_LABELS_TO_SKIP,
include_custom_fields: bool = False,
) -> None:
self.batch_size = batch_size
self.jira_base, self.jira_project = extract_jira_project(jira_project_url)
self.jira_client: JIRA | None = None
self._comment_email_blacklist = comment_email_blacklist or []

self.labels_to_skip = set(labels_to_skip)
self.include_custom_fields = include_custom_fields

@property
def comment_email_blacklist(self) -> tuple:
return tuple(email.strip() for email in self._comment_email_blacklist)

@property
def custom_fields(self) -> dict | None:
if self.include_custom_fields:
try:
custom_fields = CustomFieldExtractor.get_all_custom_fields(
self.jira_client
)
except Exception as e:
logger.warning(
"Cannot get custom fields from jira because of error:" f"{e}"
)
else:
return custom_fields
return None

def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
api_token = credentials["jira_api_token"]
# if user provide an email we assume it's cloud
Expand Down Expand Up @@ -256,6 +282,7 @@ def load_from_state(self) -> GenerateDocumentsOutput:
batch_size=self.batch_size,
comment_email_blacklist=self.comment_email_blacklist,
labels_to_skip=self.labels_to_skip,
custom_fields=self.custom_fields,
)

if doc_batch:
Expand Down Expand Up @@ -295,6 +322,7 @@ def poll_source(
batch_size=self.batch_size,
comment_email_blacklist=self.comment_email_blacklist,
labels_to_skip=self.labels_to_skip,
custom_fields=self.custom_fields,
)

if doc_batch:
Expand Down
31 changes: 31 additions & 0 deletions web/src/lib/connectors/connectors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,15 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
description:
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
},
{
type: "list",
query: "Labels to skip",
label: "Labels to skip",
name: "labels_to_skip",
optional: true,
description:
"Add labels for pages that should be skipped during indexing",
},
],
},
jira: {
Expand All @@ -298,6 +307,16 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
name: "jira_project_url",
optional: false,
},
{
type: "checkbox",
query: "Include custom fields in documents",
label: "Include custom fields",
name: "include_custom_fields",
description:
"Useful if there is important information in jira custom fields.",
default: false,
optional: true,
},
{
type: "list",
query: "Enter email addresses to blacklist from comments:",
Expand All @@ -307,6 +326,15 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
"This is generally useful to ignore certain bots. Add user emails which comments should NOT be indexed.",
optional: true,
},
{
type: "list",
query: "Labels to skip",
label: "Labels to skip",
name: "labels_to_skip",
optional: true,
description:
"Add labels for pages that should be skipped during indexing.",
},
],
},
salesforce: {
Expand Down Expand Up @@ -991,11 +1019,14 @@ export interface ConfluenceConfig {
page_id?: string;
is_cloud?: boolean;
index_recursively?: boolean;
labels_to_skip?: string[];
}

export interface JiraConfig {
jira_project_url: string;
comment_email_blacklist?: string[];
include_custom_fields: boolean;
labels_to_skip?: string[];
}

export interface SalesforceConfig {
Expand Down
Loading