huggingface · albertvillanova · Sep 22, 2022 · Sep 21, 2022 · Sep 21, 2022
diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py
@@ -41,20 +41,10 @@ def load_json_resource(resource: str) -> Tuple[Any, str]:
     return json.loads(content), f"{BASE_REF_URL}/resources/{resource}"
 
 
-def load_tsv_licenses(resource: str) -> Tuple[Any, str]:
-    content = pkg_resources.read_text(resources, resource)
-    licenses = {
-        line.split("\t")[1].strip(): line.split("\t")[0].strip() for line in content.splitlines() if "\t" in line
-    }
-    return licenses, f"{BASE_REF_URL}/resources/{resource}"
-
-
 # Source of languages.json:
 # https://datahub.io/core/language-codes/r/ietf-language-tags.csv
 # Language names were obtained with langcodes: https://github.com/LuminosoInsight/langcodes
 known_language_codes, known_language_codes_url = load_json_resource("languages.json")
-# standard_licenses.tsv is to be kept in sync with the same file in `moon-landing` and `hub-docs`
-known_licenses, known_licenses_url = load_tsv_licenses("standard_licenses.tsv")
 known_task_ids, known_task_ids_url = load_json_resource("tasks.json")
 known_creators, known_creators_url = load_json_resource("creators.json")
 known_size_categories, known_size_categories_url = load_json_resource("size_categories.json")
@@ -270,7 +260,6 @@ def validate(self):
         )
         self.language_creators, language_creators_errors = self.validate_language_creators(self.language_creators)
         self.language, language_errors = self.validate_language_codes(self.language or self.languages)
-        self.license, license_errors = self.validate_licences(self.license or self.licenses)
         self.multilinguality, multilinguality_errors = self.validate_mulitlinguality(self.multilinguality)
         self.size_categories, size_categories_errors = self.validate_size_catgeories(self.size_categories)
         self.source_datasets, source_datasets_errors = self.validate_source_datasets(self.source_datasets)
@@ -284,7 +273,6 @@ def validate(self):
         errors = {
             "annotations_creators": annotations_creators_errors,
             "language_creators": language_creators_errors,
-            "license": license_errors,
             "multilinguality": multilinguality_errors,
             "size_categories": size_categories_errors,
             "source_datasets": source_datasets_errors,
@@ -394,16 +382,6 @@ def validate_language_codes(languages: Union[List[str], Dict[str, List[str]]]) -
             lambda lang: lang == "unknown",
         )
 
-    @staticmethod
-    def validate_licences(licenses: Union[List[str], Dict[str, List[str]]]) -> ValidatorOutput:
-        validated, error = tagset_validator(
-            licenses,
-            list(known_licenses.keys()),
-            "license",
-            known_licenses_url,
-        )
-        return validated, error
-
     @staticmethod
     def validate_task_categories(task_categories: Union[List[str], Dict[str, List[str]]]) -> ValidatorOutput:
         # TODO: we're currently ignoring all values starting with 'other' as our task taxonomy is bound to change

diff --git a/src/datasets/utils/resources/standard_licenses.tsv b/src/datasets/utils/resources/standard_licenses.tsv