Skip to content

Commit

Permalink
New schema: Add chat schema (#679)
Browse files Browse the repository at this point in the history
* feat: add chat schema

* Update __init__.py

add whitespace at EoF

* Update chat.py

change the `role` field schema from ClassLabel to string
  • Loading branch information
patrickamadeus authored May 31, 2024
1 parent 4003dc0 commit 0665774
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 0 deletions.
4 changes: 4 additions & 0 deletions seacrowd/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
pairs_features_score,
pairs_multi_features,
qa_features,
chat_features,
image_features,
image_multi_features,
imqa_features,
Expand Down Expand Up @@ -106,6 +107,7 @@ class Tasks(Enum):
# Multi Text Generation
DIALOGUE_SYSTEM = "DS"
E2E_TASK_ORIENTED_DIALOGUE = "TOD"
MULTI_TURN_CONVERSATION = "MTC"

# Self Supervised & Unsupervised Text
PROMPTING = "PRT"
Expand Down Expand Up @@ -247,6 +249,7 @@ class Licenses(Enum):
Tasks.TOKEN_LEVEL_LANGUAGE_IDENTIFICATION: "SEQ_LABEL",
Tasks.COMMONSENSE_REASONING: "QA",
Tasks.QUESTION_ANSWERING: "QA",
Tasks.MULTI_TURN_CONVERSATION: "CHAT",
Tasks.QUESTION_ANSWERING_RETRIEVAL: "QA",
Tasks.CONCEPT_ALIGNMENT_CLASSIFICATION: "PAIRS",
Tasks.NEXT_SENTENCE_PREDICTION: "PAIRS",
Expand Down Expand Up @@ -315,6 +318,7 @@ class Licenses(Enum):
"KB": kb_features,
"TREE": tree_features,
"QA": qa_features,
"CHAT": chat_features,
"T2T": text2text_features,
"TEXT": text_features(),
"TEXT_MULTI": text_multi_features(),
Expand Down
2 changes: 2 additions & 0 deletions seacrowd/utils/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .pairs import features_with_continuous_label as pairs_features_score
from .pairs_multilabel import features as pairs_multi_features
from .qa import features as qa_features
from .chat import features as chat_features
from .image import features as image_features
from .image import multi_features as image_multi_features
from .imqa import features as imqa_features
Expand All @@ -28,6 +29,7 @@
"pairs_features_score",
"pairs_multi_features",
"qa_features",
"chat_features",
"image_features",
"image_multi_features",
"imqa_features",
Expand Down
24 changes: 24 additions & 0 deletions seacrowd/utils/schemas/chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Conversational Chat Schema
"""
import datasets

features = datasets.Features(
{
"id": datasets.Value("string"),
"input": datasets.Sequence({
"role": datasets.Value("string"),
"content": datasets.Value("string"),
}),
"output": datasets.Value("string"),

# the schema of 'meta' aren't specified either to allow some flexibility
"meta": {}

# notes on how to use this field of 'meta'
# you can choose two of options:
# 1. defining as empty dict if you don't think it's usable in `_generate_examples`, or
# 2. defining meta as dict of key with intended colname meta and its val with dataset.Features class
# in `_info` Dataloader method then populate it with the values in `_general_examples` Dataloader method
}
)

0 comments on commit 0665774

Please sign in to comment.