package.json

{
  "name": "llm-rgb",
  "version": "1.0.0",
  "description": "This is a set of fine grained test cases(prompts) to benchmark the reasoning and generation of LLMs in complex scenarios. Please note that this benchmark does NOT mean to be a comprehensive test against LLMs. This project is originated from an internal project of babel.cloud, the purpose of the project is to test LLMs' performance in context understanding and instruction compliance.",
  "main": "index.js",
  "scripts": {
    "eval": "promptfoo eval --no-cache --repeat ${npm_config_repeat:-1} --max-concurrency ${npm_config_concurrency:-8} -o ~/.promptfoo/output/latest.json",
    "render": "ts-node utils/generateEvalScore.ts && ts-node utils/render.ts",
    "start": "npm run eval;npm run render",
    "test": "vitest run"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/babelcloud/LLM-RGB.git"
  },
  "keywords": [
    "LLM",
    "benchmark",
    "prompt",
    "prompt-engineering",
    "prompt-testing"
  ],
  "author": "Hailong Zhang, Ziying Wang",
  "license": "MIT",
  "bugs": {
    "url": "https://github.com/babelcloud/LLM-RGB/issues"
  },
  "homepage": "https://github.com/babelcloud/LLM-RGB#readme",
  "dependencies": {
    "chalk": "^4.1.2",
    "cli-table3": "^0.6.3",
    "js-yaml": "^4.1.0",
    "promptfoo": "~0.92.2",
    "python-shell": "^5.0.0",
    "ts-node": "^10.9.1",
    "typescript": "^5.6.2"
  },
  "devDependencies": {
    "@types/node": "^20.8.7",
    "vitest": "^2.1.4"
  }
}