modelscope · DavdGao · May 9, 2024 · Apr 24, 2024 · Apr 29, 2024 · May 8, 2024
diff --git a/src/agentscope/service/web/search.py b/src/agentscope/service/web/search.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 """Search question in the web"""
-from typing import Any
-
+from typing import Any, Literal
 from agentscope.service.service_response import ServiceResponse
 from agentscope.utils.common import requests_get
 from agentscope.service.service_status import ServiceExecStatus
@@ -194,3 +193,370 @@ def google_search(
             for result in results
         ],
     )
+
+
+def dblp_search(
+    search_type: Literal["publication", "author", "venue"],
+    question: str,
+    num_results: int = 30,
+    first_hit: int = 0,
+    num_completion: int = 10,
+) -> ServiceResponse:
+    """
+    Search DBLP database based on the type specified.
+
+    Args:
+        search_type (`Literal["publication", "author", "venue"]`):
+            Type of search to perform, options are
+            "publication", "author", or "venue".
+        question (`str`):
+            The search query string.
+        num_results (`int`, defaults to `30`):
+            The total number of search results to fetch.
+        firts_hit (`int`, defaults to `0`):
+            The first hit in the numbered sequence of search results to return
+        num_completion (`int`, defaults to `10`):
+            The number of completions to generate for the search query.
+
+    Returns:
+        `ServiceResponse`: Depending on the type,
+        the response structure will vary.
+        The detailed documentation will adjust based on the type parameter.
+    """
+    mapping = {
+        "publication": dblp_search_publications,
+        "author": dblp_search_authors,
+        "venue": dblp_search_venues,
+    }
+    if search_type not in mapping:
+        raise ValueError(
+            f"Invalid type: {type}. Must be one of {list(mapping.keys())}.",
+        )
+    selected_function = mapping[search_type]
+    dblp_search.__doc__ = selected_function.__doc__
+    return selected_function(
+        question,
+        num_results,
+        first_hit,
+        num_completion,
+    )
+
+
+def dblp_search_publications(
+    question: str,
+    num_results: int = 30,
+    first_hit: int = 0,
+    num_completion: int = 10,
+) -> ServiceResponse:
+    """
+    Search publications in the DBLP database
+    via its public API and return structured
+    publication data.
+
+    Args:
+        question (`str`):
+            The search query string to look up
+            in the DBLP database.
+        num_results (`int`, defaults to `30`):
+            The number of search results to fetch.
+        firts_hit (`int`, defaults to `0`):
+            The first hit in the numbered sequence
+            of search results to return
+        num_completion (`int`, defaults to `10`):
+            The number of completions to generate
+            for the search query.
+
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        The `content` is a list of parsed publication data if successful,
+        or an error message if failed.
+        Each item in the list contains publication information
+        includes title, authors, venue, pages, year, type, DOI, and URL.
+
+    Example:
+        .. code-block:: python
+            search_results = dblp_search_publications(question="Extreme
+            Learning Machine",
+                                                      num_results=3,
+                                                      results_per_page=1,
+                                                      num_completion=1)
+            print(search_results)
+
+        It returns the following structure:
+
+        .. code-block:: python
+
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [
+                    {
+                        'title': 'Power transformers fault diagnosis
+                        based on a meta-learning approach to kernel
+                        extreme learning machine with opposition-based
+                        learning sparrow search algorithm.',
+                        'venue': 'J. Intell. Fuzzy Syst.',
+                        'pages': '455-466',
+                        'year': '2023',
+                        'type': 'Journal Articles',
+                        'doi': '10.3233/JIFS-211862',
+                        'url': 'https://dblp.org/rec/journals/jifs/YuTZTCH23',
+                        'authors': 'Song Yu, Weimin Tan, Chengming Zhang,
+                        Chao Tang, Lihong Cai, Dong Hu'
+                    },
+                    {
+                        'title': 'Performance comparison of Extreme Learning
+                        Machinesand other machine learning methods
+                        on WBCD data set.',
+                        'venue': 'SIU',
+                        'pages': '1-4',
+                        'year': '2021',
+                        'type': 'Conference and Workshop Papers',
+                        'doi': '10.1109/SIU53274.2021.9477984',
+                        'url': 'https://dblp.org/rec/conf/siu/KeskinDAY21',
+                        'authors': 'Ömer Selim Keskin, Akif Durdu,
+                        Muhammet Fatih Aslan, Abdullah Yusefi'
+                    }
+                ]
+            }
+    """
+
+    url = "https://dblp.org/search/publ/api"
+    params = {
+        "q": question,
+        "format": "json",
+        "h": num_results,
+        "f": first_hit,
+        "c": num_completion,
+    }
+    search_results = requests_get(url, params)
+
+    if isinstance(search_results, str):
+        return ServiceResponse(ServiceExecStatus.ERROR, search_results)
+
+    hits = search_results.get("result", {}).get("hits", {}).get("hit", [])
+    parsed_data = []
+    for hit in hits:
+        info = hit.get("info", {})
+        title = info.get("title", "No title available")
+        venue = info.get("venue", "No venue available")
+        pages = info.get("pages", "No page information")
+        year = info.get("year", "Year not specified")
+        pub_type = info.get("type", "Type not specified")
+        doi = info.get("doi", "No DOI available")
+        url = info.get("url", "No URL available")
+        authors = info.get("authors", {}).get("author", [])
+        authors_info = info.get("authors", {}).get("author", [])
+        if isinstance(
+            authors_info,
+            dict,
+        ):  # Check if there's only one author in a dict format
+            authors_info = [authors_info]
+        authors = ", ".join(
+            [author["text"] for author in authors_info if "text" in author],
+        )
+        data = {
+            "title": title,
+            "venue": venue,
+            "pages": pages,
+            "year": year,
+            "type": pub_type,
+            "doi": doi,
+            "url": url,
+            "authors": authors,
+        }
+        parsed_data.append(data)
+    return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data)
+
+
+def dblp_search_authors(
+    question: str,
+    num_results: int = 30,
+    first_hit: int = 0,
+    num_completion: int = 10,
+) -> ServiceResponse:
+    """
+    Search for author information in the DBLP database
+    via its public API and return structured author data.
+
+    Args:
+        question (`str`):
+            The search query string to look up
+            authors in the DBLP database.
+        num_results (`int`, defaults to `30`):
+            The total number of search results to fetch.
+        firts_hit (`int`, defaults to `0`):
+            The first hit in the numbered sequence
+            of search results to return
+        num_completion (`int`, defaults to `10`):
+            The number of completions to generate
+            for the search query.
+
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the
+        ServiceExecStatus enum, indicating the success or error of the search.
+        The `content` is a list of parsed author
+        data if successful, or an error message if failed.
+        Each item in the list contains author information
+        including their name, URL, and affiliations.
+
+    Example:
+        .. code-block:: python
+
+            search_results = dblp_search_authors(question="Liu ZiWei",
+                                                 num_results=3,
+                                                 results_per_page=1,
+                                                 num_completion=1)
+            print(search_results)
+
+        It returns the following structure:
+
+        .. code-block:: python
+
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [
+                    {
+                        'author': 'Ziwei Liu 0001',
+                        'url': 'https://dblp.org/pid/05/6300-1',
+                        'affiliations': 'Advantech Singapore Pte Ltd,
+                        Singapore;
+                        National University of Singapore,
+                        Department of Computer Science, Singapore'
+                    },
+                    {
+                        'author': 'Ziwei Liu 0002',
+                        'url': 'https://dblp.org/pid/05/6300-2',
+                        'affiliations': 'Nanyang Technological University,
+                        S-Lab, Singapore;
+                        Chinese University of Hong Kong,
+                        Department of Information Engineering,
+                        Hong Kong'
+                    }
+                ]
+            }
+    """
+    url = "https://dblp.org/search/author/api"
+    params = {
+        "q": question,
+        "format": "json",
+        "h": num_results,
+        "f": first_hit,
+        "c": num_completion,
+    }
+    search_results = requests_get(url, params)
+    if isinstance(search_results, str):
+        return ServiceResponse(ServiceExecStatus.ERROR, search_results)
+    hits = search_results.get("result", {}).get("hits", {}).get("hit", [])
+    parsed_data = []
+    for hit in hits:
+        author = hit["info"]["author"]
+        author_url = hit["info"]["url"]
+        affiliations = []
+        notes = hit["info"].get("notes", {})
+        note_entries = notes.get("note", [])
+        if isinstance(note_entries, dict):
+            note_entries = [note_entries]
+        for note in note_entries:
+            if note["@type"] == "affiliation":
+                affiliations.append(note["text"])
+        affiliations = "; ".join(affiliations)
+        entry_dict = {
+            "author": author,
+            "url": author_url,
+            "affiliations": affiliations,
+        }
+        parsed_data.append(entry_dict)
+    return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data)
+
+
+def dblp_search_venues(
+    question: str,
+    num_results: int = 30,
+    first_hit: int = 0,
+    num_completion: int = 10,
+) -> ServiceResponse:
+    """
+    Search for venue information in the DBLP database
+    via its public API and return structured venue data.
+
+    Args:
+        question (`str`):
+            The search query string to look up venues in the DBLP database.
+        num_results (`int`, defaults to `30`):
+            The total number of search results to fetch.
+        firts_hit (`int`, defaults to `0`):
+            The first hit in the numbered sequence of search results to return
+        num_completion (`int`, defaults to `10`):
+            The number of completions to generate for the search query.
+
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating the success or error of the search.
+        The `content` is a list of parsed venue data if successful,
+        or an error message if failed.
+        Each item in the list contains venue information including
+        its name, acronym, type, and URL.
+
+    Example:
+        .. code-block:: python
+
+            search_results = dblp_search_venues(question="AAAI",
+                                                 num_results=1,
+                                                 results_per_page=1,
+                                                 num_completion=1)
+            print(search_results)
+
+        It returns the following structure:
+
+        .. code-block:: python
+
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [
+                    {
+                        'venue': 'AAAI Conference on Artificial Intelligence
+                        (AAAI)',
+                        'acronym': 'AAAI',
+                        'type': 'Conference or Workshop',
+                        'url': 'https://dblp.org/db/conf/aaai/'
+                    },
+                    {
+                        'venue': ''AAAI Fall Symposium Series',
+                        'acronym': 'No acronym available',
+                        'type': 'Conference or Workshop',
+                        'url': 'https://dblp.org/db/conf/aaaifs/'
+                    }
+                ]
+            }
+    """
+    url = "https://dblp.org/search/venue/api"
+    params = {
+        "q": question,
+        "format": "json",
+        "h": num_results,
+        "f": first_hit,
+        "c": num_completion,
+    }
+    search_results = requests_get(url, params)
+    if isinstance(search_results, str):
+        return ServiceResponse(ServiceExecStatus.ERROR, search_results)
+
+    hits = search_results.get("result", {}).get("hits", {}).get("hit", [])
+    parsed_data = []
+    for hit in hits:
+        venue = hit["info"]["venue"]
+        acronym = hit["info"].get("acronym", "No acronym available")
+        venue_type = hit["info"].get("type", "Type not specified")
+        url = hit["info"]["url"]
+        entry_dict = {
+            "venue": venue,
+            "acronym": acronym,
+            "type": venue_type,
+            "url": url,
+        }
+        parsed_data.append(entry_dict)
+    return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data)