-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
replace deprecated confluence group api endpoint #3197
Changes from all commits
6d26d0b
95ab63b
4fc196f
31f4a68
e2aaa60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,15 +3,13 @@ | |
from typing import Any | ||
from urllib.parse import quote | ||
|
||
from atlassian import Confluence # type: ignore | ||
|
||
from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP | ||
from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE | ||
from danswer.configs.app_configs import INDEX_BATCH_SIZE | ||
from danswer.configs.constants import DocumentSource | ||
from danswer.connectors.confluence.onyx_confluence import build_confluence_client | ||
from danswer.connectors.confluence.onyx_confluence import OnyxConfluence | ||
from danswer.connectors.confluence.utils import attachment_to_content | ||
from danswer.connectors.confluence.utils import build_confluence_client | ||
from danswer.connectors.confluence.utils import build_confluence_document_id | ||
from danswer.connectors.confluence.utils import datetime_from_string | ||
from danswer.connectors.confluence.utils import extract_text_from_confluence_html | ||
|
@@ -114,25 +112,10 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None | |
# see https://github.com/atlassian-api/atlassian-python-api/blob/master/atlassian/rest_client.py | ||
# for a list of other hidden constructor args | ||
self._confluence_client = build_confluence_client( | ||
credentials_json=credentials, | ||
credentials=credentials, | ||
is_cloud=self.is_cloud, | ||
wiki_base=self.wiki_base, | ||
) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved this to the onyx_confluence.py file |
||
client_without_retries = Confluence( | ||
api_version="cloud" if self.is_cloud else "latest", | ||
url=self.wiki_base.rstrip("/"), | ||
username=credentials["confluence_username"] if self.is_cloud else None, | ||
password=credentials["confluence_access_token"] if self.is_cloud else None, | ||
token=credentials["confluence_access_token"] if not self.is_cloud else None, | ||
) | ||
spaces = client_without_retries.get_all_spaces(limit=1) | ||
if not spaces: | ||
raise RuntimeError( | ||
f"No spaces found at {self.wiki_base}! " | ||
"Check your credentials and wiki_base and make sure " | ||
"is_cloud is set correctly." | ||
) | ||
return None | ||
|
||
def _get_comment_string_for_page_id(self, page_id: str) -> str: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -269,20 +269,3 @@ def datetime_from_string(datetime_string: str) -> datetime: | |
datetime_object = datetime_object.astimezone(timezone.utc) | ||
|
||
return datetime_object | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved this to the onyx_confluence.py file |
||
def build_confluence_client( | ||
credentials_json: dict[str, Any], is_cloud: bool, wiki_base: str | ||
) -> OnyxConfluence: | ||
return OnyxConfluence( | ||
api_version="cloud" if is_cloud else "latest", | ||
# Remove trailing slash from wiki_base if present | ||
url=wiki_base.rstrip("/"), | ||
# passing in username causes issues for Confluence data center | ||
username=credentials_json["confluence_username"] if is_cloud else None, | ||
password=credentials_json["confluence_access_token"] if is_cloud else None, | ||
token=credentials_json["confluence_access_token"] if not is_cloud else None, | ||
backoff_and_retry=True, | ||
max_backoff_retries=10, | ||
max_backoff_seconds=60, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,5 @@ | ||
from atlassian import Confluence # type: ignore | ||
|
||
from danswer.connectors.confluence.onyx_confluence import build_confluence_client | ||
from danswer.connectors.confluence.onyx_confluence import OnyxConfluence | ||
from danswer.connectors.confluence.utils import build_confluence_client | ||
from danswer.connectors.confluence.utils import get_user_email_from_username__server | ||
from danswer.db.models import ConnectorCredentialPair | ||
from danswer.utils.logger import setup_logger | ||
|
@@ -11,68 +9,51 @@ | |
logger = setup_logger() | ||
|
||
|
||
def _get_group_members_email_paginated( | ||
def _build_group_member_email_map( | ||
confluence_client: OnyxConfluence, | ||
group_name: str, | ||
) -> set[str]: | ||
group_member_emails: set[str] = set() | ||
for member in confluence_client.paginated_group_members_retrieval(group_name): | ||
email = member.get("email") | ||
) -> dict[str, set[str]]: | ||
group_member_emails: dict[str, set[str]] = {} | ||
for user_result in confluence_client.paginated_cql_user_retrieval(): | ||
user = user_result["user"] | ||
email = user.get("email") | ||
if not email: | ||
user_name = member.get("username") | ||
# This field is only present in Confluence Server | ||
user_name = user.get("username") | ||
# If it is present, try to get the email using a Server-specific method | ||
if user_name: | ||
email = get_user_email_from_username__server( | ||
confluence_client=confluence_client, | ||
user_name=user_name, | ||
) | ||
if email: | ||
group_member_emails.add(email) | ||
if not email: | ||
# If we still don't have an email, skip this user | ||
continue | ||
|
||
for group in confluence_client.paginated_groups_by_user_retrieval(user): | ||
# group name uniqueness is enforced by Confluence, so we can use it as a group ID | ||
group_id = group["name"] | ||
group_member_emails.setdefault(group_id, set()).add(email) | ||
|
||
return group_member_emails | ||
|
||
|
||
def confluence_group_sync( | ||
cc_pair: ConnectorCredentialPair, | ||
) -> list[ExternalUserGroup]: | ||
credentials = cc_pair.credential.credential_json | ||
is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False) | ||
wiki_base = cc_pair.connector.connector_specific_config["wiki_base"] | ||
|
||
# test connection with direct client, no retries | ||
confluence_client = Confluence( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved this to the onyx_confluence.py file |
||
api_version="cloud" if is_cloud else "latest", | ||
url=wiki_base.rstrip("/"), | ||
username=credentials["confluence_username"] if is_cloud else None, | ||
password=credentials["confluence_access_token"] if is_cloud else None, | ||
token=credentials["confluence_access_token"] if not is_cloud else None, | ||
) | ||
spaces = confluence_client.get_all_spaces(limit=1) | ||
if not spaces: | ||
raise RuntimeError(f"No spaces found at {wiki_base}!") | ||
|
||
confluence_client = build_confluence_client( | ||
credentials_json=credentials, | ||
is_cloud=is_cloud, | ||
wiki_base=wiki_base, | ||
credentials=cc_pair.credential.credential_json, | ||
is_cloud=cc_pair.connector.connector_specific_config.get("is_cloud", False), | ||
wiki_base=cc_pair.connector.connector_specific_config["wiki_base"], | ||
) | ||
|
||
# Get all group names | ||
group_names: list[str] = [] | ||
for group in confluence_client.paginated_groups_retrieval(): | ||
if group_name := group.get("name"): | ||
group_names.append(group_name) | ||
|
||
# For each group name, get all members and create a danswer group | ||
group_member_email_map = _build_group_member_email_map( | ||
confluence_client=confluence_client, | ||
) | ||
danswer_groups: list[ExternalUserGroup] = [] | ||
for group_name in group_names: | ||
group_member_emails = _get_group_members_email_paginated( | ||
confluence_client, group_name | ||
) | ||
if not group_member_emails: | ||
continue | ||
for group_id, group_member_emails in group_member_email_map.items(): | ||
danswer_groups.append( | ||
ExternalUserGroup( | ||
id=group_name, | ||
id=group_id, | ||
user_emails=list(group_member_emails), | ||
) | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
individually control group sync periods based on source