diff --git a/requirements.txt b/requirements.txt index 85e087add2b..4f8709fb974 100644 --- a/requirements.txt +++ b/requirements.txt @@ -88,6 +88,7 @@ nodeenv==1.7.0 numba==0.56.4 numpy==1.23.3 openai==0.27.8 +opencv-python==4.8.1.78 openpyxl==3.0.10 outcome==1.2.0 packaging==21.3 diff --git a/src/helm/benchmark/presentation/create_plots.py b/src/helm/benchmark/presentation/create_plots.py index 395b28fcc8f..39841679ac2 100644 --- a/src/helm/benchmark/presentation/create_plots.py +++ b/src/helm/benchmark/presentation/create_plots.py @@ -10,9 +10,10 @@ import numpy as np from scipy.stats import pearsonr +from helm.benchmark.config_registry import register_builtin_configs_from_helm_package from helm.common.hierarchical_logger import hlog from helm.common.optional_dependencies import handle_module_not_found_error -from helm.benchmark.presentation.schema import read_schema, SCHEMA_CLASSIC_YAML_FILENAME +from helm.benchmark.model_metadata_registry import MODEL_NAME_TO_MODEL_METADATA from helm.benchmark.presentation.summarize import AGGREGATE_WIN_RATE_COLUMN try: @@ -133,9 +134,6 @@ def __init__(self, base_path: str, save_path: str, plot_format: str): self.plot_format = plot_format self._tables_cache: Dict[str, Dict[str, Table]] = {} - schema = read_schema(SCHEMA_CLASSIC_YAML_FILENAME) - self.model_metadata = {model_field.display_name: model_field for model_field in schema.models} - def get_group_tables(self, group_name: str) -> Dict[str, Table]: """Reads and parses group tables. Uses _tables_cache to avoid reprocessing the same table multiple times.""" if group_name in self._tables_cache: @@ -338,14 +336,14 @@ def create_all_accuracy_v_model_property_plots(self): def get_model_release_date(model_name: str) -> Optional[date]: """Maps a model name to the month of model release.""" - release_date = self.model_metadata[model_name].release_date + release_date = MODEL_NAME_TO_MODEL_METADATA[model_name].release_date if release_date is None: return None return release_date.replace(day=1) def get_model_size(model_name: str) -> Optional[int]: """Maps a model name to the number of parameters, rounding to the nearest leading digit.""" - size = self.model_metadata[model_name].num_parameters + size = MODEL_NAME_TO_MODEL_METADATA[model_name].num_parameters if size is None: return None grain = 10 ** (len(str(size)) - 1) @@ -401,7 +399,9 @@ def create_accuracy_v_access_bar_plot(self): for i, access_level in enumerate(access_levels): model_indices: List[int] = [ - idx for idx, model in enumerate(table.adapters) if self.model_metadata[model].access == access_level + idx + for idx, model in enumerate(table.adapters) + if MODEL_NAME_TO_MODEL_METADATA[model].access == access_level ] best_model_index = model_indices[table.mean_win_rates[model_indices].argmax()] @@ -611,6 +611,7 @@ def main(): parser.add_argument("--suite", type=str, help="Name of the suite that we are plotting", required=True) parser.add_argument("--plot-format", help="Format for saving plots", default="png", choices=["png", "pdf"]) args = parser.parse_args() + register_builtin_configs_from_helm_package() base_path = os.path.join(args.output_path, "runs", args.suite) if not os.path.exists(os.path.join(base_path, "groups")): hlog(f"ERROR: Could not find `groups` directory under {base_path}. Did you run `summarize.py` first?") diff --git a/src/helm/benchmark/presentation/schema.py b/src/helm/benchmark/presentation/schema.py index 1b030148148..0635dac4375 100644 --- a/src/helm/benchmark/presentation/schema.py +++ b/src/helm/benchmark/presentation/schema.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from datetime import date from typing import List, Optional, Dict import dacite import mako.template @@ -46,34 +45,6 @@ def get_short_display_name(self) -> str: return name -# Note: also see Model from `models.py`. -@dataclass(frozen=True) -class ModelField(Field): - # Organization that originally created the model (e.g. "EleutherAI") - # Note that this may be different from group or the prefix of the model `name` - # ("together" in "together/gpt-j-6b") as the hosting organization - # may be different from the creator organization. We also capitalize - # this field properly to later display in the UI. - # TODO: in the future, we want to cleanup the naming in the following ways: - # - make the creator_organization an identifier with a separate display name - # - have a convention like / - creator_organization: Optional[str] = None - - # How this model is available (e.g., limited) - access: Optional[str] = None - - # Whether we have yet to evaluate this model - todo: bool = False - - # When was the model released - release_date: Optional[date] = None - - # The number of parameters - # This should be a string as the number of parameters is usually a round number (175B), - # but we set it as an int for plotting purposes. - num_parameters: Optional[int] = None - - @dataclass(frozen=True) class MetricNameMatcher: """ @@ -222,9 +193,6 @@ class RunGroup(Field): class Schema: """Specifies information about what to display on the frontend.""" - # Models - models: List[ModelField] - # Adapter fields (e.g., temperature) adapter: List[Field] @@ -241,7 +209,6 @@ class Schema: run_groups: List[RunGroup] def __post_init__(self): - self.name_to_model = {model.name: model for model in self.models} self.name_to_metric = {metric.name: metric for metric in self.metrics} self.name_to_perturbation = {perturbation.name: perturbation for perturbation in self.perturbations} self.name_to_metric_group = {metric_group.name: metric_group for metric_group in self.metric_groups} diff --git a/src/helm/benchmark/presentation/summarize.py b/src/helm/benchmark/presentation/summarize.py index d52c39020d8..295d15660f6 100644 --- a/src/helm/benchmark/presentation/summarize.py +++ b/src/helm/benchmark/presentation/summarize.py @@ -28,7 +28,6 @@ write, ensure_directory_exists, asdict_without_nones, - serialize_dates, parallel_map, singleton, unique_simplification, @@ -47,6 +46,7 @@ from helm.benchmark.presentation.schema import ( MetricNameMatcher, RunGroup, + Field, read_schema, SCHEMA_CLASSIC_YAML_FILENAME, BY_GROUP, @@ -62,7 +62,7 @@ ) from helm.benchmark.config_registry import register_builtin_configs_from_helm_package, register_configs_from_directory from helm.benchmark.presentation.run_display import write_run_display_json -from helm.benchmark.model_metadata_registry import ModelMetadata, get_model_metadata +from helm.benchmark.model_metadata_registry import ModelMetadata, get_model_metadata, get_all_models OVERLAP_N_COUNT = 13 @@ -172,7 +172,7 @@ def get_model_metadata_for_adapter_spec(adapter_spec: AdapterSpec) -> ModelMetad except ValueError: pass - # Return a placeholder "unknoown model" model metadata. + # Return a placeholder "unknown model" model metadata. return get_unknown_model_metadata(adapter_spec.model) @@ -433,11 +433,61 @@ def group_runs(self): self.group_adapter_to_runs[group_name][adapter_spec].append(run) self.group_scenario_adapter_to_runs[group_name][scenario_spec][adapter_spec].append(run) - def write_schema(self): + @dataclass(frozen=True) + class _ModelField(Field): + """The frontend version of ModelMetadata. + + The frontend expects schema.json to contains a field under "model" that contains a list of `ModelField`s. + + All attributes have the same meaning as in ModelMetadata.""" + + # TODO: Migrate frontend to use ModelMetadata instead of ModelField and delete this. + creator_organization: Optional[str] = None + access: Optional[str] = None + todo: bool = False + release_date: Optional[str] = None + num_parameters: Optional[int] = None + + def get_model_field_dicts(self) -> List[Dict]: + """Get a list of `ModelField`s dicts that will be written to schema.json. + + The frontend expects schema.json to contains a field under "model" that contains a list of `ModelField`s. + + This is populated by reading the `ModelMetadata` configs and filtering down to models that were + actually used, and converting each `ModelMetadata` to a `ModelField`.""" + # TODO: Migrate frontend to use ModelMetadata instead of ModelField and delete this. + used_model_names: Set[str] = set() + for run in self.runs: + used_model_names.add(get_model_metadata_for_adapter_spec(run.run_spec.adapter_spec).name) + + model_field_dicts: List[Dict] = [] + for model_name in get_all_models(): + if model_name not in used_model_names: + continue + model_metadata = get_model_metadata(model_name) + model_field = Summarizer._ModelField( + name=model_metadata.name, + display_name=model_metadata.display_name, + short_display_name=model_metadata.display_name, + description=model_metadata.description, + creator_organization=model_metadata.creator_organization_name, + access=model_metadata.access, + todo=False, + release_date=model_metadata.release_date.isoformat() if model_metadata.release_date else None, + num_parameters=model_metadata.num_parameters, + ) + model_field_dicts.append(asdict_without_nones(model_field)) + return model_field_dicts + + def write_schema(self) -> None: """Write the schema file to benchmark_output so the frontend knows about it.""" + # Manually add the model metadata to the schema.json, where the frontend expects it. + # TODO: Move model metadata out of schema.json into its own model_metadata.json file. + raw_schema = asdict_without_nones(self.schema) + raw_schema["models"] = self.get_model_field_dicts() write( os.path.join(self.run_release_path, "schema.json"), - json.dumps(asdict_without_nones(self.schema), indent=2, default=serialize_dates), + json.dumps(raw_schema, indent=2), ) def read_runs(self): @@ -921,10 +971,10 @@ def run_spec_names_to_url(run_spec_names: List[str]) -> str: adapter_specs: List[AdapterSpec] = list(adapter_to_runs.keys()) if sort_by_model_order: - # Sort models by the order defined in the schema. - # Models not defined in the schema will be sorted alphabetically and - # placed before models in defined the schema. - model_order = [model.name for model in self.schema.models] + # Sort models by the order defined in the the model metadata config. + # Models not defined in the model metadata config will be sorted alphabetically and + # placed before models in defined the model metadata config. + model_order = get_all_models() def _adapter_spec_sort_key(spec): index = model_order.index(spec.model_deployment) if spec.model_deployment in model_order else -1 @@ -1304,8 +1354,6 @@ def symlink_latest(self) -> None: def run_pipeline(self, skip_completed: bool, num_instances: int) -> None: """Run the entire summarization pipeline.""" - self.write_schema() - self.read_runs() self.group_runs() self.check_metrics_defined() @@ -1320,6 +1368,10 @@ def run_pipeline(self, skip_completed: bool, num_instances: int) -> None: # because it uses self.scenario_spec_instance_id_dict self.read_overlap_stats() + # Must happen after self.read_runs() + # because it uses self.runs + self.write_schema() + self.write_executive_summary() self.write_runs() self.write_run_specs() diff --git a/src/helm/benchmark/static/schema_classic.yaml b/src/helm/benchmark/static/schema_classic.yaml index ef6d76e76c6..1885b145fd5 100644 --- a/src/helm/benchmark/static/schema_classic.yaml +++ b/src/helm/benchmark/static/schema_classic.yaml @@ -1,1068 +1,4 @@ --- -############################################################ -models: - # AI21 Labs - - name: ai21/j1-jumbo - display_name: J1-Jumbo v1 (178B) - description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)). - creator_organization: AI21 Labs - access: limited - num_parameters: 178000000000 - release_date: 2021-08-11 - - name: ai21/j1-large - display_name: J1-Large v1 (7.5B) - description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)). - creator_organization: AI21 Labs - access: limited - num_parameters: 7500000000 - release_date: 2021-08-11 - - name: ai21/j1-grande - display_name: J1-Grande v1 (17B) - description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)). - creator_organization: AI21 Labs - access: limited - num_parameters: 17000000000 - release_date: 2022-05-03 - - name: ai21/j1-grande-v2-beta - display_name: J1-Grande v2 beta (17B) - description: Jurassic-1 Grande v2 beta (17B parameters) - creator_organization: AI21 Labs - access: limited - num_parameters: 17000000000 - release_date: 2022-10-28 - - name: ai21/j2-jumbo - display_name: Jurassic-2 Jumbo (178B) - description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2)) - creator_organization: AI21 Labs - access: limited - num_parameters: 178000000000 - release_date: 2023-03-09 - - name: ai21/j2-grande - display_name: Jurassic-2 Grande (17B) - description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2)) - creator_organization: AI21 Labs - access: limited - num_parameters: 17000000000 - release_date: 2023-03-09 - - name: ai21/j2-large - display_name: Jurassic-2 Large (7.5B) - description: Jurassic-2 Large (7.5B parameters) ([docs](https://www.ai21.com/blog/introducing-j2)) - creator_organization: AI21 Labs - access: limited - num_parameters: 7500000000 - release_date: 2023-03-09 - - # Aleph Alpha - # TODO: add Luminous World when it's released - - name: AlephAlpha/luminous-base - display_name: Luminous Base (13B) - description: Luminous Base (13B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/)) - creator_organization: Aleph Alpha - access: limited - num_parameters: 13000000000 - # TODO: get exact release date - release_date: 2022-01-01 - - name: AlephAlpha/luminous-extended - display_name: Luminous Extended (30B) - description: Luminous Extended (30B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/)) - creator_organization: Aleph Alpha - access: limited - num_parameters: 30000000000 - release_date: 2022-01-01 - - name: AlephAlpha/luminous-supreme - display_name: Luminous Supreme (70B) - description: Luminous Supreme (70B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/)) - creator_organization: Aleph Alpha - access: limited - num_parameters: 70000000000 - release_date: 2022-01-01 - - # TODO: Remove Once we have configurable model names - - name: neurips/local - display_name: Local service - description: Local competition service - creator_organization: neurips - access: open - num_parameters: 1 - release_date: 2021-12-01 - - - # Anthropic - - name: anthropic/stanford-online-all-v4-s3 - display_name: Anthropic-LM v4-s3 (52B) - description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf). - creator_organization: Anthropic - access: closed - num_parameters: 52000000000 - release_date: 2021-12-01 - - name: anthropic/claude-2.0 - display_name: Anthropic Claude 2.0 - description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf)) - creator_organization: Anthropic - access: limited - release_date: 2023-07-11 - - name: anthropic/claude-2.1 - display_name: Anthropic Claude 2.1 - description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf)) - creator_organization: Anthropic - access: limited - release_date: 2023-11-21 - - name: anthropic/claude-v1.3 - display_name: Anthropic Claude v1.3 - description: A model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)). - creator_organization: Anthropic - access: limited - release_date: 2023-03-17 - - name: anthropic/claude-instant-v1 - display_name: Anthropic Claude Instant V1 - description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)). - creator_organization: Anthropic - access: limited - release_date: 2023-03-17 - - name: anthropic/claude-instant-1.2 - display_name: Anthropic Claude Instant 1.2 - description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)). - creator_organization: Anthropic - access: limited - release_date: 2023-08-09 - - # Berkeley - - name: together/koala-13b - display_name: Koala (13B) - description: Koala (13B) is a chatbot fine-tuned from Llama (13B) on dialogue data gathered from the web. ([blog post](https://bair.berkeley.edu/blog/2023/04/03/koala/)) - creator_organization: UC Berkeley - access: open - num_parameters: 13000000000 - release_date: 2022-04-03 - todo: true - - # BigScience - - name: together/bloom - display_name: BLOOM (176B) - description: BLOOM (176B parameters) is an autoregressive model trained on 46 natural languages and 13 programming languages ([paper](https://arxiv.org/pdf/2211.05100.pdf)). - creator_organization: BigScience - access: open - num_parameters: 176000000000 - release_date: 2022-06-28 - - name: together/bloomz - display_name: BLOOMZ (176B) - description: BLOOMZ (176B parameters) is BLOOM that has been fine-tuned on natural language instructions ([details](https://huggingface.co/bigscience/bloomz)). - creator_organization: BigScience - access: open - num_parameters: 176000000000 - release_date: 2022-11-03 - todo: true - - name: together/t0pp - display_name: T0pp (11B) - description: T0pp (11B parameters) is an encoder-decoder model trained on a large set of different tasks specified in natural language prompts ([paper](https://arxiv.org/pdf/2110.08207.pdf)). - creator_organization: BigScience - access: open - num_parameters: 11000000000 - release_date: 2021-10-15 - - # BigCode - - name: huggingface/santacoder - display_name: SantaCoder (1.1B) - description: SantaCoder (1.1B parameters) model trained on the Python, Java, and JavaScript subset of The Stack (v1.1) ([model card](https://huggingface.co/bigcode/santacoder)). - creator_organization: BigCode - access: open - - name: huggingface/starcoder - display_name: StarCoder (15.5B) - description: The StarCoder (15.5B parameter) model trained on 80+ programming languages from The Stack (v1.2) ([model card](https://huggingface.co/bigcode/starcoder)). - creator_organization: BigCode - access: open - - # Hugging Face - - name: huggingface/gpt2 - display_name: GPT-2 (124M) - description: GPT-2 is a transformers model pretrained on a very large corpus of English data in a self-supervised fashion. This means it was pretrained on the raw texts only, with no humans labelling them in any way (which is why it can use lots of publicly available data) with an automatic process to generate inputs and labels from those texts. - creator_organization: OpenAI - access: open - num_parameters: 124000000 - - name: huggingface/gpt2-medium - display_name: GPT-2 Medium (355M) - description: GPT-2 Medium is the 355M parameter version of GPT-2, a transformer-based language model created and released by OpenAI. The model is a pretrained model on English language using a causal language modeling (CLM) objective. - creator_organization: OpenAI - access: open - num_parameters: 355000000 - - name: huggingface/gpt2-large - display_name: GPT-2 Large (774M) - description: GPT-2 Large is the 774M parameter version of GPT-2, a transformer-based language model created and released by OpenAI. The model is a pretrained model on English language using a causal language modeling (CLM) objective. - creator_organization: OpenAI - access: open - num_parameters: 774000000 - - name: huggingface/gpt2-xl - display_name: GPT-2 XL (1.5B) - description: GPT-2 XL is the 1.5B parameter version of GPT-2, a transformer-based language model created and released by OpenAI. The model is a pretrained model on English language using a causal language modeling (CLM) objective. - creator_organization: OpenAI - access: open - num_parameters: 1500000000 - - # HuggignfaceM4 - - name: HuggingFaceM4/idefics-9b - display_name: IDEFICS (9B) - description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) - creator_organization: HuggingFace - access: open - num_parameters: 9000000000 - release_date: 2023-08-22 - - name: HuggingFaceM4/idefics-9b-instruct - display_name: IDEFICS instruct (9B) - description: IDEFICS instruct (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) - creator_organization: HuggingFace - access: open - num_parameters: 9000000000 - release_date: 2023-08-22 - - name: HuggingFaceM4/idefics-80b - display_name: IDEFICS (80B) - description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) - creator_organization: HuggingFace - access: open - num_parameters: 80000000000 - release_date: 2023-08-22 - - name: HuggingFaceM4/idefics-80b-instruct - display_name: IDEFICS instruct (80B) - description: IDEFICS instruct (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) - creator_organization: HuggingFace - access: open - num_parameters: 80000000000 - release_date: 2023-08-22 - - # Cerebras Systems - - name: together/cerebras-gpt-6.7b - display_name: Cerebras GPT (6.7B) - description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf)) - creator_organization: Cerebras - access: limited - num_parameters: 6700000000 - release_date: 2023-04-06 - todo: true - - name: together/cerebras-gpt-13b - display_name: Cerebras GPT (13B) - description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf)) - creator_organization: Cerebras - access: limited - num_parameters: 13000000000 - release_date: 2023-04-06 - todo: true - - # Cohere - - name: cohere/xlarge-20220609 - display_name: Cohere xlarge v20220609 (52.4B) - description: Cohere xlarge v20220609 (52.4B parameters) - creator_organization: Cohere - access: limited - num_parameters: 52400000000 - release_date: 2022-06-09 - - name: cohere/large-20220720 - display_name: Cohere large v20220720 (13.1B) - description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022. - creator_organization: Cohere - access: limited - num_parameters: 13100000000 - release_date: 2022-07-20 - - name: cohere/medium-20220720 - display_name: Cohere medium v20220720 (6.1B) - description: Cohere medium v20220720 (6.1B parameters) - creator_organization: Cohere - access: limited - num_parameters: 6100000000 - release_date: 2022-07-20 - - name: cohere/small-20220720 - display_name: Cohere small v20220720 (410M) - description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022. - creator_organization: Cohere - access: limited - num_parameters: 410000000 - release_date: 2022-07-20 - - name: cohere/xlarge-20221108 - display_name: Cohere xlarge v20221108 (52.4B) - description: Cohere xlarge v20221108 (52.4B parameters) - creator_organization: Cohere - access: limited - num_parameters: 52400000000 - release_date: 2022-11-08 - - name: cohere/medium-20221108 - display_name: Cohere medium v20221108 (6.1B) - description: Cohere medium v20221108 (6.1B parameters) - creator_organization: Cohere - access: limited - num_parameters: 6100000000 - release_date: 2022-11-08 - - name: cohere/command-medium-beta - display_name: Cohere Command beta (6.1B) - description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)). - creator_organization: Cohere - access: limited - num_parameters: 6100000000 - release_date: 2022-11-08 - - name: cohere/command-xlarge-beta - display_name: Cohere Command beta (52.4B) - description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)). - creator_organization: Cohere - access: limited - num_parameters: 52400000000 - release_date: 2022-11-08 - - name: cohere/command - display_name: Cohere Command - description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog) - creator_organization: Cohere - access: limited - release_date: 2023-09-29 - - name: cohere/command-light - display_name: Cohere Command Light - description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog) - creator_organization: Cohere - access: limited - release_date: 2023-09-29 - - # Databricks - - name: databricks/dolly-v2-3b - display_name: Dolly V2 (3B) - description: Dolly V2 (3B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b. - creator_organization: Databricks - access: open - num_parameters: 2517652480 - release_date: 2023-04-12 - todo: true - - name: databricks/dolly-v2-7b - display_name: Dolly V2 (7B) - description: Dolly V2 (7B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b. - creator_organization: Databricks - access: open - num_parameters: 6444163072 - release_date: 2023-04-12 - todo: true - - name: databricks/dolly-v2-12b - display_name: Dolly V2 (12B) - description: Dolly V2 (12B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b. - creator_organization: Databricks - access: open - num_parameters: 11327027200 - release_date: 2023-04-12 - todo: true - - # DeepMind - - name: deepmind/gopher - display_name: Gopher (280B) - description: Gopher (540B parameters) ([paper](https://arxiv.org/pdf/2112.11446.pdf)). - creator_organization: DeepMind - access: closed - todo: true - - name: deepmind/chinchilla - display_name: Chinchilla (70B) - description: Chinchilla (70B parameters) ([paper](https://arxiv.org/pdf/2203.15556.pdf)). - creator_organization: DeepMind - access: closed - todo: true - - # EleutherAI - - name: together/gpt-j-6b - display_name: GPT-J (6B) - description: GPT-J (6B parameters) autoregressive language model trained on The Pile ([details](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/)). - creator_organization: EleutherAI - access: open - num_parameters: 6000000000 - release_date: 2021-06-04 - - name: together/gpt-neox-20b - display_name: GPT-NeoX (20B) - description: GPT-NeoX (20B parameters) autoregressive language model trained on The Pile ([paper](https://arxiv.org/pdf/2204.06745.pdf)). - creator_organization: EleutherAI - access: open - num_parameters: 20000000000 - release_date: 2022-02-02 - - name: eleutherai/pythia-1b-v0 - display_name: Pythia (1B) - description: Pythia (1B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers. - creator_organization: EleutherAI - access: open - num_parameters: 805736448 - release_date: 2023-02-13 - todo: true - - name: eleutherai/pythia-2.8b-v0 - display_name: Pythia (2.8B) - description: Pythia (2.8B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers. - creator_organization: EleutherAI - access: open - num_parameters: 2517652480 - release_date: 2023-02-13 - todo: true - - name: eleutherai/pythia-6.9b - display_name: Pythia (6.9B) - description: Pythia (6.9B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers. - creator_organization: EleutherAI - access: open - num_parameters: 6444163072 - release_date: 2023-02-13 - - name: eleutherai/pythia-12b-v0 - display_name: Pythia (12B) - description: Pythia (12B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers. - creator_organization: EleutherAI - access: open - num_parameters: 11327027200 - release_date: 2023-02-13 - - # Google - - name: together/t5-11b - display_name: T5 (11B) - description: T5 (11B parameters) is an encoder-decoder model trained on a multi-task mixture, where each task is converted into a text-to-text format ([paper](https://arxiv.org/pdf/1910.10683.pdf)). - creator_organization: Google - access: open - num_parameters: 11000000000 - release_date: 2019-10-23 - - name: together/ul2 - display_name: UL2 (20B) - description: UL2 (20B parameters) is an encoder-decoder model trained on the C4 corpus. It's similar to T5 but trained with a different objective and slightly different scaling knobs ([paper](https://arxiv.org/pdf/2205.05131.pdf)). - creator_organization: Google - access: open - num_parameters: 20000000000 - release_date: 2022-05-10 - - name: together/flan-t5-xxl - display_name: Flan-T5 (11B) - description: Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)). - creator_organization: Google - access: open - - name: google/palm - display_name: PaLM (540B) - description: Pathways Language Model (540B parameters) is trained using 6144 TPU v4 chips ([paper](https://arxiv.org/pdf/2204.02311.pdf)). - creator_organization: Google - access: closed - todo: true - ## PaLM 2 - - name: google/text-bison@001 - display_name: PaLM-2 (Bison) - description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions - - name: google/text-bison-32k - display_name: PaLM-2 (Bison) - description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions - - name: google/text-unicorn@001 - display_name: PaLM-2 (Unicorn) - description: The largest model in PaLM family. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-11-30 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions - - name: google/code-bison@001 - display_name: Codey PaLM-2 (Bison) - description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions - - name: google/code-bison-32k - display_name: Codey PaLM-2 (Bison) - description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions - - # HazyResearch - - name: together/h3-2.7b - display_name: H3 (2.7B) - description: H3 (2.7B parameters) is a decoder-only language model based on state space models ([paper](https://arxiv.org/abs/2212.14052)). - creator_organization: HazyResearch - access: open - num_parameters: 2700000000 - release_date: 2023-01-23 - todo: true - - # Lightning AI's Lit-GPT - - name: lightningai/lit-gpt - display_name: Lit-GPT - description: Lit-GPT is an optimized collection of open-source LLMs for finetuning and inference. It supports – Falcon, Llama 2, Vicuna, LongChat, and other top-performing open-source large language models. - creator_organization: Lightning AI - access: open - num_parameters: 1 - release_date: 2023-04-04 - - - # Meta - - name: together/opt-iml-175b - display_name: OPT-IML (175B) - description: OPT-IML (175B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)). - creator_organization: Meta - access: open - num_parameters: 175000000000 - release_date: 2022-12-22 - todo: true - - - name: together/opt-iml-30b - display_name: OPT-IML (30B) - description: OPT-IML (30B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)). - creator_organization: Meta - access: open - num_parameters: 30000000000 - release_date: 2022-12-22 - todo: true - - - name: together/opt-175b - display_name: OPT (175B) - description: Open Pre-trained Transformers (175B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)). - creator_organization: Meta - access: open - num_parameters: 175000000000 - release_date: 2022-05-02 - - - name: together/opt-66b - display_name: OPT (66B) - description: Open Pre-trained Transformers (66B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)). - creator_organization: Meta - access: open - num_parameters: 66000000000 - release_date: 2022-05-02 - - - name: together/opt-6.7b - display_name: OPT (6.7B) - description: Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)). - creator_organization: Meta - access: open - num_parameters: 6700000000 - release_date: 2022-05-02 - - - name: together/opt-1.3b - display_name: OPT (1.3B) - description: Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)). - creator_organization: Meta - access: open - num_parameters: 1300000000 - release_date: 2022-05-02 - - - name: together/galactica-120b - display_name: Galactica (120B) - description: Galactica (120B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)). - creator_organization: Meta - access: open - num_parameters: 120000000000 - release_date: 2022-11-15 - todo: true - - - name: together/galactica-30b - display_name: Galactica (30B) - description: Galactica (30B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)). - creator_organization: Meta - access: open - num_parameters: 30000000000 - release_date: 2022-11-15 - todo: true - - name: meta/llama-7b - display_name: LLaMA (7B) - description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters. - creator_organization: Meta - access: open - num_parameters: 7000000000 - release_date: 2023-02-24 - - name: meta/llama-13b - display_name: LLaMA (13B) - description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters. - creator_organization: Meta - access: open - num_parameters: 13000000000 - release_date: 2023-02-24 - - name: meta/llama-30b - display_name: LLaMA (30B) - description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters. - creator_organization: Meta - access: open - num_parameters: 30000000000 - release_date: 2023-02-24 - - name: meta/llama-65b - display_name: LLaMA (65B) - description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters. - creator_organization: Meta - access: open - num_parameters: 65000000000 - release_date: 2023-02-24 - - name: meta/llama-2-7b - display_name: Llama 2 (7B) - description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. - creator_organization: Meta - access: open - num_parameters: 7000000000 - release_date: 2023-07-18 - - name: meta/llama-2-13b - display_name: Llama 2 (13B) - description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. - creator_organization: Meta - access: open - num_parameters: 13000000000 - release_date: 2023-07-18 - - name: meta/llama-2-70b - display_name: Llama 2 (70B) - description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. - creator_organization: Meta - access: open - num_parameters: 70000000000 - release_date: 2023-07-18 - - # Stability AI - - name: stabilityai/stablelm-base-alpha-3b - display_name: StableLM-Base-Alpha (3B) - description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models. - creator_organization: Stability AI - access: open - num_parameters: 3000000000 - release_date: 2023-04-20 - todo: true - - - name: stabilityai/stablelm-base-alpha-7b - display_name: StableLM-Base-Alpha (7B) - description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models. - creator_organization: Stability AI - access: open - num_parameters: 7000000000 - release_date: 2023-04-20 - todo: true - - # Stanford - - name: stanford/alpaca-7b - display_name: Alpaca (7B) - description: Alpaca 7B is a model fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations - creator_organization: Stanford - access: open - num_parameters: 7000000000 - release_date: 2023-03-13 - - # LMSYS - - name: lmsys/vicuna-7b-v1.3 - display_name: Vicuna v1.3 (7B) - description: Vicuna v1.3 (7B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. - creator_organization: LMSYS - access: open - num_parameters: 7000000000 - release_date: 2023-06-22 - - name: lmsys/vicuna-13b-v1.3 - display_name: Vicuna v1.3 (13B) - description: Vicuna v1.3 (13B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. - creator_organization: LMSYS - access: open - num_parameters: 13000000000 - release_date: 2023-06-22 - - # 01.AI - - name: 01-ai/yi-6b - display_name: Yi (6B) - description: The Yi models are large language models trained from scratch by developers at 01.AI. - creator_organization: 01.AI - access: open - num_parameters: 6000000000 - release_date: 2023-11-02 - - name: 01-ai/yi-34b - display_name: Yi (34B) - description: The Yi models are large language models trained from scratch by developers at 01.AI. - creator_organization: 01.AI - access: open - num_parameters: 34000000000 - release_date: 2023-11-02 - - # Mistral AI - - name: mistralai/mistral-7b-v0.1 - display_name: Mistral v0.1 (7B) - description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). - creator_organization: Mistral AI - access: open - num_parameters: 7300000000 - release_date: 2023-09-27 - - # Microsoft/NVIDIA - - name: microsoft/TNLGv2_530B - display_name: TNLG v2 (530B) - description: TNLG v2 (530B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)). - creator_organization: Microsoft/NVIDIA - access: closed - num_parameters: 530000000000 - release_date: 2022-01-28 - - name: microsoft/TNLGv2_7B - display_name: TNLG v2 (6.7B) - description: TNLG v2 (6.7B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)). - creator_organization: Microsoft/NVIDIA - access: closed - num_parameters: 6700000000 - release_date: 2022-01-28 - - # OpenAI: https://beta.openai.com/docs/engines/gpt-3 - - name: openai/davinci - display_name: davinci (175B) - description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 175000000000 - release_date: 2020-05-28 - - name: openai/curie - display_name: curie (6.7B) - description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 6700000000 - release_date: 2020-05-28 - - name: openai/babbage - display_name: babbage (1.3B) - description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 1300000000 - release_date: 2020-05-28 - - name: openai/ada - display_name: ada (350M) - description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 350000000 - release_date: 2020-05-28 - - name: openai/text-davinci-003 - display_name: text-davinci-003 - description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 175000000000 - release_date: 2022-11-28 - - name: openai/text-davinci-002 - display_name: text-davinci-002 - description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 175000000000 - release_date: 2022-01-27 - - name: openai/text-davinci-001 - display_name: text-davinci-001 - description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 175000000000 - release_date: 2022-01-27 - todo: true - - name: openai/text-curie-001 - display_name: text-curie-001 - description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 6700000000 - release_date: 2022-01-27 - - name: openai/text-babbage-001 - display_name: text-babbage-001 - description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 1300000000 - release_date: 2022-01-27 - - name: openai/text-ada-001 - display_name: text-ada-001 - description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 350000000 - release_date: 2022-01-27 - - name: openai/gpt-4-0314 - display_name: gpt-4-0314 - description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from March 14th 2023. - creator_organization: OpenAI - access: limited - release_date: 2023-03-14 - - name: openai/gpt-4-32k-0314 - display_name: gpt-4-32k-0314 - description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from March 14th 2023. - creator_organization: OpenAI - access: limited - release_date: 2023-03-14 - - name: openai/gpt-4-0613 - display_name: gpt-4-0613 - description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-06-13. - creator_organization: OpenAI - access: limited - release_date: 2023-06-13 - - name: openai/gpt-4-32k-0613 - display_name: gpt-4-32k-0613 - description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from 2023-06-13. - creator_organization: OpenAI - access: limited - release_date: 2023-06-13 - - name: openai/code-davinci-002 - display_name: code-davinci-002 - description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)). - creator_organization: OpenAI - access: limited - - name: openai/code-davinci-001 - display_name: code-davinci-001 - description: code-davinci-001 model - creator_organization: OpenAI - access: limited - todo: true - - name: openai/code-cushman-001 - display_name: code-cushman-001 (12B) - description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf). - creator_organization: OpenAI - access: limited - - name: openai/gpt-3.5-turbo-0301 - display_name: gpt-3.5-turbo-0301 - description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01. - creator_organization: OpenAI - access: limited - release_date: 2023-03-01 - - name: openai/gpt-3.5-turbo-0613 - display_name: gpt-3.5-turbo-0613 - description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13. - creator_organization: OpenAI - access: limited - release_date: 2023-06-13 - - name: openai/gpt-3.5-turbo-16k-0613 - display_name: gpt-3.5-turbo-16k-0613 - description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens. - creator_organization: OpenAI - access: limited - release_date: 2023-06-13 - - name: openai/gpt-4-1106-preview - display_name: gpt-4-1106-preview - description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from November 6, 2023. - creator_organization: OpenAI - access: limited - release_date: 2023-11-06 - - # Together - - name: together/Together-gpt-JT-6B-v1 - display_name: GPT-JT (6B) - description: GPT-JT (6B parameters) is a fork of GPT-J ([blog post](https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai)). - creator_organization: Together - access: open - num_parameters: 6700000000 - release_date: 2022-11-29 - todo: true - - name: together/gpt-neoxt-chat-base-20b - display_name: GPT-NeoXT-Chat-Base (20B) - description: GPT-NeoXT-Chat-Base (20B) is fine-tuned from GPT-NeoX, serving as a base model for developing open-source chatbots. - creator_organization: Together - access: open - num_parameters: 20000000000 - release_date: 2023-03-08 - todo: true - - name: together/redpajama-incite-base-3b-v1 - display_name: RedPajama-INCITE-Base-v1 (3B) - description: RedPajama-INCITE-Base-v1 (3B parameters) is a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible. - creator_organization: Together - access: open - num_parameters: 3000000000 - release_date: 2023-05-05 - - name: together/redpajama-incite-instruct-3b-v1 - display_name: RedPajama-INCITE-Instruct-v1 (3B) - description: RedPajama-INCITE-Instruct-v1 (3B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible. - creator_organization: Together - access: open - num_parameters: 3000000000 - release_date: 2023-05-05 - todo: true - - name: together/redpajama-incite-chat-3b-v1 - display_name: RedPajama-INCITE-Chat-v1 (3B) - description: RedPajama-INCITE-Chat-v1 (3B parameters) is a model fine-tuned on OASST1 and Dolly2 to enhance chatting ability. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible. - creator_organization: Together - access: open - num_parameters: 3000000000 - release_date: 2023-05-05 - todo: true - - name: together/redpajama-incite-base-7b - display_name: RedPajama-INCITE-Base (7B) - description: RedPajama-INCITE-Base (7B parameters) is a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible. - creator_organization: Together - access: open - num_parameters: 7000000000 - release_date: 2023-05-05 - todo: true - - name: together/redpajama-incite-instruct-7b - display_name: RedPajama-INCITE-Instruct (7B) - description: RedPajama-INCITE-Instruct (7B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base (7B), a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible. - creator_organization: Together - access: open - num_parameters: 7000000000 - release_date: 2023-05-05 - todo: true - - # MosaicML - - name: mosaicml/mpt-7b - display_name: MPT (7B) - description: MPT (7B) is a Transformer trained from scratch on 1T tokens of text and code. - creator_organization: MosaicML - access: open - num_parameters: 6700000000 - release_date: 2023-05-05 - - name: mosaicml/mpt-7b-chat - display_name: MPT-Chat (7B) - description: MPT-Chat (7B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B) , a Transformer trained from scratch on 1T tokens of text and code. - creator_organization: MosaicML - access: open - num_parameters: 6700000000 - release_date: 2023-05-05 - todo: true - - name: mosaicml/mpt-instruct-7b - display_name: MPT-Instruct (7B) - description: MPT-Instruct (7B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code. - creator_organization: MosaicML - access: open - num_parameters: 6700000000 - release_date: 2023-05-05 - - name: mosaicml/mpt-30b - display_name: MPT (30B) - description: MPT (30B) is a Transformer trained from scratch on 1T tokens of text and code. - creator_organization: MosaicML - access: open - num_parameters: 30000000000 - release_date: 2023-06-22 - - name: mosaicml/mpt-30b-chat - display_name: MPT-Chat (30B) - description: MPT-Chat (30B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code. - creator_organization: MosaicML - access: open - num_parameters: 30000000000 - release_date: 2023-06-22 - todo: true - - name: mosaicml/mpt-instruct-30b - display_name: MPT-Instruct (30B) - description: MPT-Instruct (30B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code. - creator_organization: MosaicML - access: open - num_parameters: 30000000000 - release_date: 2023-06-22 - - # TII UAE - - name: tiiuae/falcon-7b - display_name: Falcon (7B) - description: Falcon-7B is a 7B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora. - creator_organization: TII UAE - access: open - num_parameters: 7000000000 - release_date: 2023-03-15 - - name: tiiuae/falcon-7b-instruct - display_name: Falcon-Instruct (7B) - description: Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets. - creator_organization: TII UAE - access: open - num_parameters: 7000000000 - release_date: 2023-03-15 - - name: tiiuae/falcon-40b - display_name: Falcon (40B) - description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora. - creator_organization: TII UAE - access: open - num_parameters: 40000000000 - release_date: 2023-05-25 - - name: tiiuae/falcon-40b-instruct - display_name: Falcon-Instruct (40B) - description: Falcon-40B-Instruct is a 40B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets. - creator_organization: TII UAE - access: open - num_parameters: 40000000000 - release_date: 2023-05-25 - - # Salesforce - - name: together/codegen - display_name: CodeGen (16B) - description: CodeGen (16B parameters) is an open dense code model trained for multi-turn program synthesis ([blog](https://arxiv.org/pdf/2203.13474.pdf)). - creator_organization: Tsinghua - access: open - num_parameters: 16000000000 - release_date: 2022-03-25 - todo: true - - # Tsinghua - - name: together/glm - display_name: GLM (130B) - description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)). - creator_organization: Tsinghua - access: open - num_parameters: 130000000000 - release_date: 2022-08-04 - - - name: together/codegeex - display_name: CodeGeeX (13B) - description: CodeGeeX (13B parameters) is an open dense code model trained on more than 20 programming languages on a corpus of more than 850B tokens ([blog](http://keg.cs.tsinghua.edu.cn/codegeex/)). - creator_organization: Tsinghua - access: open - num_parameters: 13000000000 - release_date: 2022-09-19 - todo: true - - # Writer - - name: writer/palmyra-base - display_name: Palmyra Base (5B) - description: Palmyra Base (5B) - creator_organization: Writer - access: limited - num_parameters: 5000000000 - release_date: 2022-10-13 - - name: writer/palmyra-large - display_name: Palmyra Large (20B) - description: Palmyra Large (20B) - creator_organization: Writer - access: limited - num_parameters: 20000000000 - release_date: 2022-12-23 - - name: writer/palmyra-instruct-30 - display_name: InstructPalmyra (30B) - description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans. - creator_organization: Writer - access: limited - num_parameters: 30000000000 - release_date: 2023-02-16 - - name: writer/palmyra-e - display_name: Palmyra E (30B) - description: Palmyra E (30B) - creator_organization: Writer - access: limited - num_parameters: 30000000000 - release_date: 2023-03-03 - - name: writer/silk-road - display_name: Silk Road (35B) - description: Silk Road (35B) - creator_organization: Writer - access: limited - num_parameters: 35000000000 - release_date: 2023-04-13 - - name: writer/palmyra-x - display_name: Palmyra X (43B) - description: Palmyra-X (43B parameters) is trained to adhere to instructions using human feedback and utilizes a technique called multiquery attention. Furthermore, a new feature called 'self-instruct' has been introduced, which includes the implementation of an early stopping criteria specifically designed for minimal instruction tuning ([paper](https://dev.writer.com/docs/becoming-self-instruct-introducing-early-stopping-criteria-for-minimal-instruct-tuning)). - creator_organization: Writer - access: limited - num_parameters: 43000000000 - release_date: 2023-06-11 - - name: writer/palmyra-x-v2 - display_name: Palmyra X V2 (33B) - description: Palmyra-X V2 (33B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. The pre-training data more than 2 trillion tokens types are diverse and cover a wide range of areas, used FlashAttention-2. - creator_organization: Writer - access: limited - num_parameters: 33000000000 - release_date: 2023-12-01 - - name: writer/palmyra-x-v3 - display_name: Palmyra X V3 (72B) - description: Palmyra-X V3 (72B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. It is trained via unsupervised learning and DPO and use multiquery attention. - creator_organization: Writer - access: limited - num_parameters: 72000000000 - release_date: 2023-12-01 - - name: writer/palmyra-x-32k - display_name: Palmyra X-32K (33B) - description: Palmyra-X-32K (33B parameters) is a Transformer-based model, which is trained on large-scale pre-training data. The pre-training data types are diverse and cover a wide range of areas. These data types are used in conjunction and the alignment mechanism to extend context window. - creator_organization: Writer - access: limited - num_parameters: 33000000000 - release_date: 2023-12-01 - - # Yandex - - name: together/yalm - display_name: YaLM (100B) - description: YaLM (100B parameters) is an autoregressive language model trained on English and Russian text ([GitHub](https://github.com/yandex/YaLM-100B)). - creator_organization: Yandex - access: open - num_parameters: 100000000000 - release_date: 2022-06-23 - - # NVIDIA - - name: nvidia/megatron-gpt2 - display_name: Megatron GPT2 - description: GPT-2 implemented in Megatron-LM ([paper](https://arxiv.org/abs/1909.08053)). - creator_organization: NVIDIA - access: open - todo: true - ############################################################ adapter: - name: method diff --git a/src/helm/benchmark/static/schema_lite.yaml b/src/helm/benchmark/static/schema_lite.yaml index ec6abd6dab3..47f769abc3c 100644 --- a/src/helm/benchmark/static/schema_lite.yaml +++ b/src/helm/benchmark/static/schema_lite.yaml @@ -1,229 +1,4 @@ --- -############################################################ -models: - # Anthropic - - name: anthropic/claude-2.0 - display_name: Anthropic Claude 2.0 - description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf)) - creator_organization: Anthropic - access: limited - release_date: 2023-07-11 - - name: anthropic/claude-2.1 - display_name: Anthropic Claude 2.1 - description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf)) - creator_organization: Anthropic - access: limited - release_date: 2023-11-21 - - name: anthropic/claude-v1.3 - display_name: Anthropic Claude v1.3 - description: A model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)). - creator_organization: Anthropic - access: limited - release_date: 2023-03-17 - - name: anthropic/claude-instant-1.2 - display_name: Anthropic Claude Instant 1.2 - description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)). - creator_organization: Anthropic - access: limited - release_date: 2023-08-09 - - # Cohere - - name: cohere/command - display_name: Cohere Command - description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog) - creator_organization: Cohere - access: limited - release_date: 2023-09-29 - - name: cohere/command-light - display_name: Cohere Command Light - description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog) - creator_organization: Cohere - access: limited - release_date: 2023-09-29 - - # Meta - - name: meta/llama-65b - display_name: LLaMA (65B) - description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters. - creator_organization: Meta - access: open - num_parameters: 65000000000 - release_date: 2023-02-24 - - name: meta/llama-2-7b - display_name: Llama 2 (7B) - description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. - creator_organization: Meta - access: open - num_parameters: 7000000000 - release_date: 2023-07-18 - - name: meta/llama-2-13b - display_name: Llama 2 (13B) - description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. - creator_organization: Meta - access: open - num_parameters: 13000000000 - release_date: 2023-07-18 - - name: meta/llama-2-70b - display_name: Llama 2 (70B) - description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1. - creator_organization: Meta - access: open - num_parameters: 70000000000 - release_date: 2023-07-18 - - # 01.AI - - name: 01-ai/yi-6b - display_name: Yi (6B) - description: The Yi models are large language models trained from scratch by developers at 01.AI. - creator_organization: 01.AI - access: open - num_parameters: 6000000000 - release_date: 2023-11-02 - - name: 01-ai/yi-34b - display_name: Yi (34B) - description: The Yi models are large language models trained from scratch by developers at 01.AI. - creator_organization: 01.AI - access: open - num_parameters: 34000000000 - release_date: 2023-11-02 - - # Mistral AI - - name: mistralai/mistral-7b-v0.1 - display_name: Mistral v0.1 (7B) - description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). - creator_organization: Mistral AI - access: open - num_parameters: 7300000000 - release_date: 2023-09-27 - - - name: mistralai/mixtral-8x7b-32kseqlen - display_name: Mixtral (8x7B 32K seqlen) - description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)). - creator_organization: Mistral AI - access: open - num_parameters: 56000000000 - release_date: 2023-12-08 - - # OpenAI - - name: openai/text-davinci-003 - display_name: GPT-3.5 (text-davinci-003) - description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 175000000000 - release_date: 2022-11-28 - - name: openai/text-davinci-002 - display_name: GPT-3.5 (text-davinci-002) - description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)). - creator_organization: OpenAI - access: limited - num_parameters: 175000000000 - release_date: 2022-01-27 - - name: openai/gpt-4-0613 - display_name: GPT-4 (0613) - description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-06-13. - creator_organization: OpenAI - access: limited - release_date: 2023-06-13 - - name: openai/gpt-4-1106-preview - display_name: GPT-4 Turbo (1106 preview) - description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from November 6, 2023. - creator_organization: OpenAI - access: limited - release_date: 2023-11-06 - - name: openai/gpt-3.5-turbo-0613 - display_name: GPT-3.5 Turbo (0613) - description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13. - creator_organization: OpenAI - access: limited - release_date: 2023-06-13 - - # Writer - - name: writer/palmyra-x-v2 - display_name: Palmyra X V2 (33B) - description: Palmyra-X V2 (33B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. The pre-training data more than 2 trillion tokens types are diverse and cover a wide range of areas, used FlashAttention-2. - creator_organization: Writer - access: limited - num_parameters: 33000000000 - release_date: 2023-12-01 - - name: writer/palmyra-x-v3 - display_name: Palmyra X V3 (72B) - description: Palmyra-X V3 (72B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. It is trained via unsupervised learning and DPO and use multiquery attention. - creator_organization: Writer - access: limited - num_parameters: 72000000000 - release_date: 2023-12-01 - - # Google - - name: google/text-bison@001 - display_name: PaLM-2 (Bison) - description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions - - name: google/text-unicorn@001 - display_name: PaLM-2 (Unicorn) - description: The largest model in PaLM family. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf)) - creator_organization: Google - access: limited - release_date: 2023-11-30 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions - - # TII UAE - - name: tiiuae/falcon-7b - display_name: Falcon (7B) - description: Falcon-7B is a 7B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora. - creator_organization: TII UAE - access: open - num_parameters: 7000000000 - release_date: 2023-03-15 - - name: tiiuae/falcon-40b - display_name: Falcon (40B) - description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora. - creator_organization: TII UAE - access: open - num_parameters: 40000000000 - release_date: 2023-05-25 - - # AI21 Labs - - name: ai21/j2-jumbo - display_name: Jurassic-2 Jumbo (178B) - description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2)) - creator_organization: AI21 Labs - access: limited - num_parameters: 178000000000 - release_date: 2023-03-09 - - name: ai21/j2-grande - display_name: Jurassic-2 Grande (17B) - description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2)) - creator_organization: AI21 Labs - access: limited - num_parameters: 17000000000 - release_date: 2023-03-09 - - # Aleph Alpha - - name: AlephAlpha/luminous-base - display_name: Luminous Base (13B) - description: Luminous Base (13B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/)) - creator_organization: Aleph Alpha - access: limited - num_parameters: 13000000000 - # TODO: get exact release date - release_date: 2022-01-01 - - name: AlephAlpha/luminous-extended - display_name: Luminous Extended (30B) - description: Luminous Extended (30B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/)) - creator_organization: Aleph Alpha - access: limited - num_parameters: 30000000000 - release_date: 2022-01-01 - - name: AlephAlpha/luminous-supreme - display_name: Luminous Supreme (70B) - description: Luminous Supreme (70B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/)) - creator_organization: Aleph Alpha - access: limited - num_parameters: 70000000000 - release_date: 2022-01-01 - ############################################################ adapter: - name: method