Skip to content

Commit

Permalink
Merge branch 'tech_debt/openai_chat_completions'
Browse files Browse the repository at this point in the history
  • Loading branch information
Daggx committed Nov 23, 2023
2 parents 1546b1e + d800d3c commit 6f24942
Show file tree
Hide file tree
Showing 11 changed files with 305 additions and 375 deletions.
87 changes: 40 additions & 47 deletions edenai_apis/apis/openai/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,6 @@
from edenai_apis.utils.languages import get_language_name_from_code


def format_example_fn(x: List[List[str]]) -> str:
examples_formated = ""
for example in x:
examples_formated += "Text: {text}\nText Classification: {label}\n\n".format(
text=example[0].replace("\n", " ").strip(),
label=example[1].replace("\n", " ").strip(),
)
return examples_formated


def format_texts_fn(x: List[str]) -> str:
texts_formated = ""
for text in x:
texts_formated += "Text: {text}\n-----\n".format(
text=text.replace("\n", " ").strip()
)
return texts_formated


def construct_classification_instruction(
texts: list, labels: list, examples: list
) -> str:
Expand Down Expand Up @@ -66,24 +47,20 @@ def formatted_text_classification(data):


def construct_anonymization_context(text: str) -> str:
output_template = '{{"redactedText" : "...", "entities": [{{content: entity, label: category, confidence_score: confidence score, offset: start_offset}}]}}'
prompt = f"""Please analyze the following text and identify any personal and sensitive information contained within it. For each instance of personal information, please provide a category and a confidence score. Categories could include, but should not be limited to, names, addresses, phone numbers, email addresses, social security numbers, enterprise name, any private information and credit card numbers. Use a confidence score between 0 and 1 to indicate how certain you are that the identified information is actually personal information.
The text is included between three backticks.
First write the redacted Text by replacing each character of identified entity with `*`, then extract the entity, finally extract the confidence Score between 0.0-1.0.
The category must be one of the following: "name", "address", "phonenumber", "email", "social security number", "organization", "credit card number", "other".
Your output should be a json that looks like this : {output_template}
The text:
```{text}```
Your output:'
"""
return prompt


def construct_keyword_extraction_context(text: str) -> str:
output_template = '{"items":[{"keyword": ... , "importance": ...}]}'
prompt = f"""
Expand All @@ -104,31 +81,39 @@ def construct_keyword_extraction_context(text: str) -> str:
def construct_translation_context(
text: str, source_language: str, target_language: str
) -> str:
prompt = f"Translate the following text from {get_language_name_from_code(source_language)} to {get_language_name_from_code(target_language)}:. text:\n###{text}###\ntranslation:"
return prompt
return f"""
Translate the following text from {get_language_name_from_code(source_language)} to {get_language_name_from_code(target_language)}
The text is written between three backticks.
Text:
```{text}```
Your Output:
"""

def construct_language_detection_context(text: str) -> str:
prompt = "Detect the ISO 639-1 language (only the code) of this text.\n\n Text: ###{data}###\ISO 639-1:".format(
data=text
)
return prompt
return f"""
Analyze the following text and determine the ISO 639-1 language code:
Please provide only the ISO 639-1 code as the output.
The text is written between three backticks.
```{text}```
"""


def construct_sentiment_analysis_context(text: str) -> str:
prompt = (
f"Label the text below with one of these sentiments 'Positive','Negative','Neutral'.\n\nThe text is delimited by triple backticks text:\n ```"
+ text
+ "```\n\nYour label:"
)
return prompt
return f"""
Analyze the following text and label it with one of these sentiments 'Postive', 'Negative', 'Neutral'.
The text is written between three backticks.
```{text}```
"""


def construct_topic_extraction_context(text: str) -> str:
prompt = "What is the main taxonomy of the text below. text:###{data}###put the result in this line:\n\n".format(
data=text
)
return prompt
return f"""
Analyze the following text and identify its main taxonomy.
The text is written between three backticks.
```{text}```
"""


def get_openapi_response(response: Response):
Expand Down Expand Up @@ -183,16 +168,23 @@ def construct_ner_instruction(text: str) -> str:
"""
This function takes a text as input and returns a string that contains the instruction.
"""
return f"""Please extract named entities, their types and a confidence score from the text below. The confidence score between 0 and 1.
The text is delimited by triple backticks text.
The output should be a json formatted like follows : {{"items":[{{"entity":"entity","category":"categrory","importance":score}}]}}\n
The input text:
example_output = [
{"Entity": "Barack Obama", "Type": "Person", "Confidence": 0.98},
{"Entity": "44th President", "Type": "Position", "Confidence": 0.95},
{"Entity": "United States", "Type": "Country", "Confidence": 0.99},
]
return f"""Please extract named entities, their types, and assign a confidence score between 0 and 1 from the following text. The text is enclosed within triple backticks.
Input Text:
```{text}```
Your output:"
For example, if the input text is:
```Barack Obama was the 44th President of the United States.```
Your output should be in the following format:
```
{example_output}
```
"""


Expand Down Expand Up @@ -425,6 +417,7 @@ def convert_tts_audio_rate(audio_rate: int) -> float:
else:
return ((audio_rate - 0) / (100 - 0)) * (4 - 1) + 1


def finish_unterminated_json(json_string: str, end_brackets: str) -> str:
"""
take a cut json_string and try to terminate it
Expand Down
11 changes: 5 additions & 6 deletions edenai_apis/apis/openai/info.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,12 @@
],
"allow_null_language": true,
"models": [
"text-davinci-003",
"text-davinci-002",
"text-curie-001",
"text-babbage-001",
"text-ada-001"
"gpt-4",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k"
],
"default_model": "text-davinci-003"
"default_model": "gpt-3.5-turbo-1106"
},
"version": "v3.0.0"
},
Expand Down
Loading

0 comments on commit 6f24942

Please sign in to comment.