Merge branch 'master' of github.com:edenai/edenai-apis

edenai · Nov 10, 2023 · 7aa8a79 · 7aa8a79
2 parents bd1905a + 5268d42
commit 7aa8a79
Show file tree

Hide file tree

Showing 26 changed files with 1,270 additions and 50 deletions.
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,5 @@ media
 .idea
 
 pyrightconfig.json
+
+*.env*
diff --git a/AVAILABLES_FEATURES_AND_PROVIDERS.md b/AVAILABLES_FEATURES_AND_PROVIDERS.md
@@ -32,6 +32,7 @@
 | **resume_parser** | affinda |
 | | hireability |
 | | klippa |
+| | senseloaf |
 | **custom_document_parsing_async** | amazon |
 | **data_extraction** | amazon |
 | | base64 |
@@ -63,6 +64,7 @@
 | | clarifai |
 | | cohere |
 | | google |
+| | mistral |
 | | openai |
 | **summarize** | alephalpha |
 | | cohere |
@@ -620,6 +622,13 @@
 | | invoice_parser |
 | | receipt_parser |
 
+</details>
+<details><summary>mistral</summary>
+
+| Features | Subfeatures |
+|----------|-------------|
+| **text** | generation |
+
 </details>
 <details><summary>modernmt</summary>
 
@@ -767,6 +776,13 @@
 | | sentiment_analysis |
 | | spell_check |
 
+</details>
+<details><summary>senseloaf</summary>
+
+| Features | Subfeatures |
+|----------|-------------|
+| **ocr** | resume_parser |
+
 </details>
 <details><summary>sentisight</summary>
 

diff --git a/edenai_apis/api_keys/mistral_settings_template.json b/edenai_apis/api_keys/mistral_settings_template.json
@@ -0,0 +1,6 @@
+{
+    "user_id": "",
+    "app_id": "",
+    "key": ""
+  }
+
diff --git a/edenai_apis/api_keys/senseloaf_settings_template.json b/edenai_apis/api_keys/senseloaf_settings_template.json
@@ -0,0 +1,7 @@
+{
+    "api_key": "",
+    "email": "",
+    "password": "",
+    "comment": "You can either set your API Key, or your Login Credentials (Email and Password) for Authentication"
+}
+
diff --git a/edenai_apis/apis/__init__.py b/edenai_apis/apis/__init__.py
@@ -56,5 +56,7 @@
 from .winstonai import WinstonaiApi
 from .vernai import VernaiApi
 from .readyredact import ReadyRedactApi
+from .senseloaf import SenseloafApi
+from .mistral import MistralApi
 
 # THIS NEEDS TO BE DONE AUTOMATICALLY
diff --git a/edenai_apis/apis/mistral/__init__.py b/edenai_apis/apis/mistral/__init__.py
@@ -0,0 +1 @@
+from .mistral_api import MistralApi
diff --git a/edenai_apis/apis/mistral/errors.py b/edenai_apis/apis/mistral/errors.py
@@ -0,0 +1,17 @@
+from edenai_apis.utils.exception import (
+    ProviderErrorLists,
+    ProviderInternalServerError,
+    ProviderTimeoutError,
+)
+
+# NOTE: error messages should be regex patterns
+ERRORS: ProviderErrorLists = {
+    ProviderInternalServerError: [
+        r"Error calling Clarifai",
+        r"Failure",
+    ],
+    ProviderTimeoutError: [
+        r"<[^<>]+debug_error_string = 'UNKNOWN:Error received from peer ipv4:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+ {created_time:'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[\+\-]\d{2}:\d{2}', grpc_status:14, grpc_message:'GOAWAY received'}'>",
+        r"Model is deploying"
+    ]
+    }
diff --git a/edenai_apis/apis/mistral/info.json b/edenai_apis/apis/mistral/info.json
@@ -0,0 +1,11 @@
+{
+    "text": {
+        "generation": {
+            "constraints": {
+                "models":["mistral-7B-Instruct", "mistral-7B-OpenOrca","openHermes-2-mistral-7B"],
+                "default_model": "mistral-7B-Instruct"
+            },
+            "version": "v1"
+        }
+    }
+}
diff --git a/edenai_apis/apis/mistral/mistral_api.py b/edenai_apis/apis/mistral/mistral_api.py
@@ -0,0 +1,87 @@
+from typing import Dict
+from google.protobuf.json_format import MessageToDict
+from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
+from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
+from clarifai_grpc.grpc.api.status import status_code_pb2
+
+from edenai_apis.features import ProviderInterface, TextInterface
+from edenai_apis.features.text.generation.generation_dataclass import (
+    GenerationDataClass,
+)
+from edenai_apis.loaders.data_loader import ProviderDataEnum
+from edenai_apis.loaders.loaders import load_provider
+from edenai_apis.utils.exception import ProviderException
+from edenai_apis.utils.types import ResponseType
+
+
+
+class MistralApi(ProviderInterface, TextInterface):
+    provider_name = "mistral"
+
+    def __init__(self, api_keys: Dict = {}) -> None:
+        self.api_settings = load_provider(
+            ProviderDataEnum.KEY, self.provider_name, api_keys=api_keys
+        )
+        self.user_id = self.api_settings["user_id"]
+        self.app_id = self.api_settings["app_id"]
+        self.key = self.api_settings["key"]
+
+    def __chat_markup_tokens(self, model):
+        if model == "mistral-7B-Instruct":
+            return "[INST]", "[/INST]"
+        else:
+            return "<|im_start|>", "<|im_end|>"
+
+    def text__generation(
+        self, text: str, temperature: float, max_tokens: int, model: str
+    ) -> ResponseType[GenerationDataClass]:
+        start, end = self.__chat_markup_tokens(model)
+
+        text = f"{start} {text} {end}"
+
+        channel = ClarifaiChannel.get_grpc_channel()
+        stub = service_pb2_grpc.V2Stub(channel)
+
+        metadata = (("authorization", self.key),)
+        user_data_object = resources_pb2.UserAppIDSet(
+            user_id="mistralai", app_id="completion"
+        )
+
+        post_model_outputs_response = stub.PostModelOutputs(
+            service_pb2.PostModelOutputsRequest(
+                user_app_id=user_data_object,
+                model_id=model,
+                inputs=[
+                    resources_pb2.Input(
+                        data=resources_pb2.Data(text=resources_pb2.Text(raw=text))
+                    )
+                ],
+            ),
+            metadata=metadata,
+        )
+
+        if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
+            raise ProviderException(
+                post_model_outputs_response.status.description,
+                code=post_model_outputs_response.status.code,
+            )
+
+        response = MessageToDict(
+            post_model_outputs_response, preserving_proto_field_name=True
+        )
+
+        output = response.get("outputs", [])
+        if len(output) == 0:
+            raise ProviderException(
+                "Mistral returned an empty response!",
+                code=post_model_outputs_response.status.code,
+            )
+
+        original_response = output[0].get("data", {}) or {}
+
+        return ResponseType[GenerationDataClass](
+            original_response=original_response,
+            standardized_response=GenerationDataClass(
+                generated_text=(original_response.get("text", {}) or {}).get("raw", "")
+            ),
+        )
diff --git a/edenai_apis/apis/mistral/outputs/text/generation_output.json b/edenai_apis/apis/mistral/outputs/text/generation_output.json
@@ -0,0 +1,13 @@
+{
+  "original_response": {
+    "text": {
+      "raw": "party\n\nAI assistant: Hi there! I'm an AI language model, designed to assist and engage in conversations. My name is not \"who are you?\" but I'm here to help you with any questions or tasks you may have. How can I assist you today?",
+      "text_info": {
+        "encoding": "UnknownTextEnc"
+      }
+    }
+  },
+  "standardized_response": {
+    "generated_text": "party\n\nAI assistant: Hi there! I'm an AI language model, designed to assist and engage in conversations. My name is not \"who are you?\" but I'm here to help you with any questions or tasks you may have. How can I assist you today?"
+  }
+}
diff --git a/edenai_apis/apis/originalityai/originalityai_api.py b/edenai_apis/apis/originalityai/originalityai_api.py
@@ -1,19 +1,25 @@
-from http import HTTPStatus
 import json
+from collections import defaultdict
+from http import HTTPStatus
 from typing import Dict
+
 import requests
+
+from edenai_apis.features import TextInterface
 from edenai_apis.features.provider.provider_interface import ProviderInterface
 from edenai_apis.features.text.ai_detection.ai_detection_dataclass import (
     AiDetectionDataClass,
     AiDetectionItem,
 )
-from edenai_apis.features import TextInterface
-from edenai_apis.features.text.plagia_detection.plagia_detection_dataclass import PlagiaDetectionCandidate, PlagiaDetectionDataClass, PlagiaDetectionItem
-from edenai_apis.loaders.loaders import load_provider
+from edenai_apis.features.text.plagia_detection.plagia_detection_dataclass import (
+    PlagiaDetectionCandidate,
+    PlagiaDetectionDataClass,
+    PlagiaDetectionItem,
+)
 from edenai_apis.loaders.data_loader import ProviderDataEnum
+from edenai_apis.loaders.loaders import load_provider
 from edenai_apis.utils.exception import ProviderException
 from edenai_apis.utils.types import ResponseType
-from collections import defaultdict
 
 
 class OriginalityaiApi(ProviderInterface, TextInterface):
@@ -27,32 +33,29 @@ def __init__(self, api_keys: Dict = {}) -> None:
         self.base_url = "https://api.originality.ai/api/v1/scan"
 
     def text__plagia_detection(
-        self, text:str, title: str = ""
+        self, text: str, title: str = ""
     ) -> ResponseType[PlagiaDetectionDataClass]:
-
         url = f"{self.base_url}/plag"
-        payload = {
-            "content" : text,
-            "title": title
-        }
-        headers = {
-            "content-type" : "application/json",
-            "X-OAI-API-KEY" : self.api_key
-        }
+        payload = {"content": text, "title": title}
+        headers = {"content-type": "application/json", "X-OAI-API-KEY": self.api_key}
+
+        response = requests.post(url, headers=headers, json=payload)
 
         try:
-            response = requests.post(url, headers=headers, json = payload)
-        except Exception as excp:
-            raise ProviderException(str(excp), code = 500)
-
-        original_response = response.json()
-        if response.status_code > HTTPStatus.BAD_REQUEST:
+            original_response = response.json()
+        except json.JSONDecodeError as exc:
+            print(response.status_code)
+            print(response.text)
             raise ProviderException(
-                original_response, 
-                code= response.status_code
-            )
-
-        total_score = float(int(original_response["total_text_score"].replace("%", "")) / 100)
+                message="Internal Server Error", code=response.status_code
+            ) from exc
+
+        if response.status_code != HTTPStatus.OK:
+            raise ProviderException(response.json(), code=response.status_code)
+
+        total_score = float(
+            int(original_response["total_text_score"].replace("%", "")) / 100
+        )
 
         items = []
         for result in original_response.get("results", []) or []:
@@ -62,22 +65,17 @@ def text__plagia_detection(
                 candidates.append(
                     PlagiaDetectionCandidate(
                         url=match["website"],
-                        plagia_score= match["score"] / 100,
-                        plagiarized_text= match["pText"]
+                        plagia_score=match["score"] / 100,
+                        plagiarized_text=match["pText"],
                     )
                 )
-            items.append(
-                PlagiaDetectionItem(
-                    text= text,
-                    candidates= candidates
-                )
-            )
-
+            items.append(PlagiaDetectionItem(text=text, candidates=candidates))
+
         standardized_response = PlagiaDetectionDataClass(
-            plagia_score= total_score, items= items
+            plagia_score=total_score, items=items
         )
 
-        #remove credits information from original response
+        # remove credits information from original response
         original_response.pop("credits_used")
         original_response.pop("credits")
 
@@ -87,8 +85,6 @@ def text__plagia_detection(
         )
 
         return result
-
-
 
     def text__ai_detection(self, text: str) -> ResponseType[AiDetectionDataClass]:
         url = f"{self.base_url}/ai"
@@ -102,12 +98,16 @@ def text__ai_detection(self, text: str) -> ResponseType[AiDetectionDataClass]:
         }
         response = requests.post(url=url, headers=headers, json=payload)
 
-        original_response = response.json()
+        try:
+            original_response = response.json()
+        except json.JSONDecodeError as exc:
+            raise ProviderException(
+                message="Internal Server Error", code=response.status_code
+            ) from exc
 
         if response.status_code != 200:
             raise ProviderException(
-                original_response.get("error"),
-                code = response.status_code
+                original_response.get("error"), code=response.status_code
             )
 
         default_dict = defaultdict(lambda: None)
@@ -126,10 +126,10 @@ def text__ai_detection(self, text: str) -> ResponseType[AiDetectionDataClass]:
             ai_score=original_response.get("score", defaultdict).get("ai"), items=items
         )
 
-        #remove credits information from original response
+        # remove credits information from original response
         original_response.pop("credits_used")
         original_response.pop("credits")
-        
+
         result = ResponseType[AiDetectionDataClass](
             original_response=original_response,
             standardized_response=standardized_response,

diff --git a/edenai_apis/apis/senseloaf/__init__.py b/edenai_apis/apis/senseloaf/__init__.py
@@ -0,0 +1 @@
+from .senseloaf_api import SenseloafApi
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,3 +24,5 @@ media
		.idea

		pyrightconfig.json

		.env