huggingface · Wauplin · Oct 10, 2023 · Oct 3, 2023 · Oct 4, 2023 · Oct 4, 2023
diff --git a/src/huggingface_hub/templates/datasetcard_template.md b/src/huggingface_hub/templates/datasetcard_template.md
@@ -1,103 +1,130 @@
 ---
-# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
+# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
 # Doc / guide: https://huggingface.co/docs/hub/datasets-cards
 {{ card_data }}
 ---
 
 # Dataset Card for {{ pretty_name | default("Dataset Name", true) }}
 
-## Dataset Description
+<!-- Provide a quick summary of the dataset. -->
 
-- **Homepage:** {{ homepage_url | default("", true)}}
-- **Repository:** {{ repo_url | default("", true)}}
-- **Paper:** {{ paper_url | default("", true)}}
-- **Leaderboard:** {{ leaderboard_url | default("", true)}}
-- **Point of Contact:** {{ point_of_contact | default("", true)}}
+{{ dataset_summary | default("", true) }}
 
-### Dataset Summary
+## Dataset Details
 
-{{ dataset_summary | default("[More Information Needed]", true)}}
+### Dataset Description
 
-### Supported Tasks and Leaderboards
+<!-- Provide a longer summary of what this dataset is. -->
 
-{{ supported_tasks_and_leaderboards_section | default("[More Information Needed]", true)}}
+{{ dataset_description | default("", true) }}
 
-### Languages
+- **Curated by:** {{ curators | default("[More Information Needed]", true)}}
+- **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}}
+- **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}}
+- **License:** {{ license | default("[More Information Needed]", true)}}
 
-{{ languages_section | default("[More Information Needed]", true)}}
+### Dataset Sources [optional]
 
-## Dataset Structure
+<!-- Provide the basic links for the dataset. -->
+
+- **Repository:** {{ repo | default("[More Information Needed]", true)}}
+- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
+- **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}}
+
+## Uses
+
+<!-- Address questions around how the dataset is intended to be used. -->
 
-### Data Instances
+### Out-of-Scope Use
 
-{{ data_instances_section | default("[More Information Needed]", true)}}
+<!-- This section addresses misuse, malicious use, and uses that the dataset will not work well for. -->
 
-### Data Fields
+{{ out_of_scope_use | default("[More Information Needed]", true)}}
 
-{{ data_fields_section | default("[More Information Needed]", true)}}
+## Dataset Structure
 
-### Data Splits
+<!-- This section provides description of the dataset fields, and additional information about the dataset structure such as criteria used to create the splits, relationships between data points, etc. -->
 
-{{ data_splits_section | default("[More Information Needed]", true)}}
+{{ dataset_structure | default("[More Information Needed]", true)}}
 
 ## Dataset Creation
 
 ### Curation Rationale
 
+<!-- Motivation for the creation of this dataset. -->
+
 {{ curation_rationale_section | default("[More Information Needed]", true)}}
 
 ### Source Data
 
+<!-- This section describes the source data (e.g. news text and headlines, social media posts, translated sentences, ...). -->
+
 #### Initial Data Collection and Normalization
 
+<!-- This section describes the data collection process such as data selection criteria, filtering methods, runtime information (tools and libraries), etc. -->
+
 {{ data_collection_section | default("[More Information Needed]", true)}}
 
-#### Who are the source language producers?
+#### Who are the source data producers?
 
-{{ source_language_producers_section | default("[More Information Needed]", true)}}
+<!-- This section describes the people or systems who originally created the data. It should also include self-reported demographic or identity information for the source data creators if this information is available. -->
 
-### Annotations
+{{ source_data_producers_section | default("[More Information Needed]", true)}}
+
+### Annotations [optional]
+
+<!-- If the dataset contains annotations which are not part of the initial data collection, use this section to desribe them. -->
 
 #### Annotation process
 
+<!-- This section describes the annotation process such as annotation tools used in the process, the amount of data annotated, annotation guidelines provided to the annotators, interannotator statistics, annotation validation, etc. -->
+
 {{ annotation_process_section | default("[More Information Needed]", true)}}
 
 #### Who are the annotators?
 
+<!-- This section describes the people or systems who created the annotations. -->
+
 {{ who_are_annotators_section | default("[More Information Needed]", true)}}
 
-### Personal and Sensitive Information
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+{{ bias_risks_limitations | default("[More Information Needed]", true)}}
+
+### Recommendations
 
-{{ personal_and_sensitive_information_section | default("[More Information Needed]", true)}}
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
 
-## Considerations for Using the Data
+{{ bias_recommendations | default("Users should be made aware of the risks, biases and limitations of the dataset. More information needed for further recommendations.", true)}}
 
-### Social Impact of Dataset
+## Citation [optional]
 
-{{ social_impact_section | default("[More Information Needed]", true)}}
+<!-- If there is a paper or blog post introducing the dataset, the APA and Bibtex information for that should go in this section. -->
 
-### Discussion of Biases
+**BibTeX:**
 
-{{ discussion_of_biases_section | default("[More Information Needed]", true)}}
+{{ citation_bibtex | default("[More Information Needed]", true)}}
 
-### Other Known Limitations
+**APA:**
 
-{{ known_limitations_section | default("[More Information Needed]", true)}}
+{{ citation_apa | default("[More Information Needed]", true)}}
 
-## Additional Information
+## Glossary [optional]
 
-### Dataset Curators
+<!-- If relevant, include terms and calculations in this section that can help readers understand the dataset or dataset card. -->
 
-{{ dataset_curators_section | default("[More Information Needed]", true)}}
+{{ glossary | default("[More Information Needed]", true)}}
 
-### Licensing Information
+## More Information [optional]
 
-{{ licensing_information_section | default("[More Information Needed]", true)}}
+{{ more_information | default("[More Information Needed]", true)}}
 
-### Citation Information
+## Dataset Card Authors [optional]
 
-{{ citation_information_section | default("[More Information Needed]", true)}}
+{{ dataset_card_authors | default("[More Information Needed]", true)}}
 
-### Contributions
+## Dataset Card Contact
 
-{{ contributions_section | default("[More Information Needed]", true)}}
+{{ dataset_card_contact | default("[More Information Needed]", true)}}
diff --git a/src/huggingface_hub/templates/modelcard_template.md b/src/huggingface_hub/templates/modelcard_template.md
@@ -77,7 +77,7 @@ Use the code below to get started with the model.
 
 ### Training Data
 
-<!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
 
 {{ training_data | default("[More Information Needed]", true)}}
 
@@ -108,7 +108,7 @@ Use the code below to get started with the model.
 
 #### Testing Data
 
-<!-- This should link to a Data Card if possible. -->
+<!-- This should link to a Dataset Card if possible. -->
 
 {{ testing_data | default("[More Information Needed]", true)}}
 

diff --git a/tests/test_repocard.py b/tests/test_repocard.py
@@ -891,10 +891,8 @@ def test_dataset_card_from_default_template_with_template_variables(self):
         # Here we pass the card data as kwargs as well so template picks up pretty_name.
         card = DatasetCard.from_template(
             card_data,
-            homepage_url="https://huggingface.co",
-            repo_url="https://github.com/huggingface/huggingface_hub",
-            paper_url="https://arxiv.org/pdf/1910.03771.pdf",
-            point_of_contact="https://huggingface.co/nateraw",
+            repo="https://github.com/huggingface/huggingface_hub",
+            paper="https://arxiv.org/pdf/1910.03771.pdf",
             dataset_summary=(
                 "This is a test dataset card to check if the template variables "
                 "in the dataset card template are working."
@@ -903,8 +901,8 @@ def test_dataset_card_from_default_template_with_template_variables(self):
         self.assertTrue(card.text.strip().startswith("# Dataset Card for My Cool Dataset"))
         self.assertIsInstance(card, DatasetCard)
 
-        matches = re.findall(r"Homepage:\*\* https:\/\/huggingface\.co", str(card))
-        self.assertEqual(matches[0], "Homepage:** https://huggingface.co")
+        matches = re.findall(r"Repository:\*\* https://github\.com/huggingface/huggingface_hub", str(card))
+        self.assertEqual(matches[0], "Repository:** https://github.com/huggingface/huggingface_hub")
 
     @require_jinja
     def test_dataset_card_from_custom_template(self):