leaderboard at oct 13

Q-Future · Oct 13, 2023 · 5ae6be7 · 5ae6be7
1 parent ea88228
commit 5ae6be7
Show file tree

Hide file tree

Showing 9 changed files with 176 additions and 28 deletions.
diff --git a/leaderboards/IQA_outputs/eval.ipynb b/leaderboards/IQA_outputs/eval.ipynb
@@ -38,14 +38,14 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0734d7a0-ad5d-43b6-b9ce-92cf924e2a1a",
+   "cell_type": "markdown",
+   "id": "13d12331-5d4b-422f-a376-9c321431036b",
    "metadata": {
     "tags": []
    },
-   "outputs": [],
-   "source": []
+   "source": [
+    "### Main Results"
+   ]
   },
   {
    "cell_type": "code",
@@ -73,11 +73,14 @@
       "|mplug_owl | 0.634/0.644 | 0.409/0.427 | 0.241/0.271 | 0.437/0.487 | 0.148/0.180 | 0.687/0.711 | 0.466/0.486| 0.432/0.458|\n",
       "|otter_v1 | 0.436/0.441 | 0.406/0.406 | 0.143/0.142 | -0.008/0.018 | 0.254/0.264 | 0.475/0.481 | 0.557/0.577| 0.323/0.333|\n",
       "|qwen-vl | 0.676/0.669 | 0.470/0.546 | 0.298/0.338 | 0.504/0.532 | 0.273/0.284 | 0.617/0.686 | 0.486/0.486| 0.475/0.506|\n",
-      "|shikra | 0.327/0.337 | 0.314/0.307 | 0.222/0.227 | 0.322/0.336 | 0.198/0.201 | 0.640/0.661 | 0.324/0.332| 0.335/0.343|\n"
+      "|shikra | 0.327/0.337 | 0.314/0.307 | 0.222/0.227 | 0.322/0.336 | 0.198/0.201 | 0.640/0.661 | 0.324/0.332| 0.335/0.343|\n",
+      "|visualglm | 0.498/0.507 | 0.247/0.234 | 0.146/0.154 | 0.110/0.116 | 0.209/0.183 | 0.342/0.349 | 0.127/0.131| 0.240/0.239|\n"
      ]
     }
    ],
    "source": [
+    "## Official Results\n",
+    "\n",
     "import json, glob\n",
     "models = glob.glob(\"*/\")\n",
     "\n",
@@ -120,45 +123,172 @@
     "                    else:\n",
     "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d])[0])\n",
     "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d])[0])\n",
+    "                else:\n",
+    "                    if json_ == json_prefix + \"cgi.json\":\n",
+    "                        # as in paper\n",
+    "                        d1, d2 = d[:3000], d[3000:6000]\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d1], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d1])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d1], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d1])[0])\n",
+    "                        s += (spearmanr([float(di[\"gt_score\"]) for di in d2], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d2])[0])\n",
+    "                        p += (pearsonr([float(di[\"gt_score\"]) for di in d2], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d2])[0])\n",
+    "                        s /= 2\n",
+    "                        p /= 2\n",
+    "                    else:\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d])[0])\n",
+    "                stri += \" | {:.3f}/{:.3f}\".format(s, p)\n",
+    "                avg_s += s\n",
+    "                avg_p += p\n",
     "    \n",
+    "    print(\"|\"+stri+\"|\"+\" {:.3f}/{:.3f}|\".format(avg_s/7, avg_p/7))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6da07745-2572-4559-9f05-75e991282efd",
+   "metadata": {},
+   "source": [
+    "#### What if we do not use the proposed softmax strategy in Q-Bench?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c5b57fe9-30d2-461c-8d3b-4126c2e58829",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Results NaN/NaN means that the argmax(logit_good, logit_poor) is always one value, that the model constantly predict good/bad.\n",
+      "| **Model Name**| SPAQ| KoNIQ-10k| LIVE-FB| LIVE-itw| CGIQA-6K| AGIQA-3K| KADID-10K| average| \n",
+      "| -| -| -| -| -| -| -| -| -| \n",
+      "|clip_vit_l14 | 0.269/0.269 | 0.383/0.427 | 0.163/0.185 | 0.246/0.226 | 0.030/0.031 | 0.167/0.191 | 0.271/0.272| 0.219/0.229|\n",
+      "|idefics | 0.119/0.127 | 0.040/0.050 | 0.050/0.073 | 0.029/0.028 | 0.066/0.069 | 0.254/0.302 | 0.020/0.020| 0.083/0.096|\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ps/anaconda3/lib/python3.11/site-packages/scipy/stats/_stats_py.py:4916: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.\n",
+      "  warnings.warn(stats.ConstantInputWarning(warn_msg))\n",
+      "/home/ps/anaconda3/lib/python3.11/site-packages/scipy/stats/_stats_py.py:4424: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.\n",
+      "  warnings.warn(stats.ConstantInputWarning(msg))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "|instructblip_t5 | -0.010/-0.010 | 0.007/0.003 | 0.011/0.010 | -0.034/-0.033 | nan/nan | -0.015/-0.015 | 0.011/0.011| nan/nan|\n",
+      "|instructblip_vicuna | 0.663/0.664 | 0.284/0.353 | 0.156/0.250 | 0.196/0.264 | 0.214/0.222 | 0.506/0.567 | 0.305/0.307| 0.332/0.375|\n",
+      "|kosmos_2 | 0.533/0.535 | 0.074/0.085 | 0.084/0.095 | 0.152/0.173 | 0.065/0.066 | 0.159/0.182 | 0.186/0.186| 0.179/0.189|\n",
+      "|llama_adapter_v2 | 0.417/0.423 | 0.218/0.237 | 0.223/0.257 | 0.205/0.239 | 0.200/0.200 | 0.545/0.579 | 0.228/0.230| 0.291/0.309|\n",
+      "|llava_v1.5 | 0.481/0.484 | 0.311/0.341 | 0.244/0.270 | 0.306/0.346 | 0.228/0.227 | 0.607/0.667 | 0.251/0.253| 0.347/0.370|\n",
+      "|llava_v1 | 0.101/0.108 | 0.038/0.045 | 0.036/0.055 | 0.059/0.075 | 0.066/0.079 | 0.240/0.297 | 0.051/0.051| 0.084/0.101|\n",
+      "|minigpt4_13b | 0.009/0.010 | 0.009/0.013 | 0.019/0.019 | 0.025/0.035 | nan/nan | 0.055/0.066 | nan/nan| nan/nan|\n",
+      "|mplug_owl | 0.463/0.469 | 0.111/0.154 | 0.081/0.124 | 0.170/0.237 | nan/nan | 0.410/0.466 | 0.203/0.204| nan/nan|\n",
+      "|otter_v1 | 0.108/0.108 | 0.101/0.117 | 0.082/0.087 | -0.007/0.009 | 0.109/0.115 | 0.422/0.434 | 0.463/0.465| 0.183/0.191|\n",
+      "|qwen-vl | 0.128/0.127 | 0.262/0.251 | 0.223/0.216 | 0.345/0.327 | 0.231/0.236 | 0.427/0.440 | 0.387/0.389| 0.286/0.284|\n",
+      "|shikra | 0.277/0.281 | 0.178/0.202 | 0.152/0.170 | 0.248/0.267 | 0.093/0.100 | 0.513/0.563 | 0.245/0.246| 0.244/0.261|\n",
+      "|visualglm | 0.415/0.418 | 0.139/0.138 | 0.088/0.091 | 0.051/0.044 | 0.055/0.057 | 0.300/0.319 | 0.063/0.063| 0.159/0.161|\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Ablation Results for Using ‘’‘Argmax’‘’ between \"good\" and \"poor\"\n",
+    "\n",
+    "print(\"Results NaN/NaN means that the argmax(logit_good, logit_poor) is always one value, that the model constantly predict good/bad.\")\n",
+    "datasets = [\"\", \"**Model Name**\", \"SPAQ\",\"KoNIQ-10k\",\"LIVE-FB\",\"LIVE-itw\",\"CGIQA-6K\", \"AGIQA-3K\", \"KADID-10K\", \"average\", \"\"]\n",
+    "print(\"| \".join(datasets))\n",
+    "lst = [\"\"] + [\"-\" for i in datasets[1:-1]] + [\"\"]\n",
+    "print(\"| \".join(lst))\n",
+    "for json_prefix in sorted(models):\n",
+    "    jsons = [\n",
+    "        json_prefix + \"spaq.json\",\n",
+    "        json_prefix + \"koniq.json\",\n",
+    "        json_prefix + \"flive.json\",\n",
+    "        json_prefix + \"livec.json\",\n",
+    "        json_prefix + \"cgi.json\",\n",
+    "        json_prefix + \"agi.json\",\n",
+    "        json_prefix + \"kadid.json\",\n",
+    "    ]\n",
+    "    stri = json_prefix[:-1]\n",
+    "    avg_s, avg_p = 0., 0.\n",
+    "    for json_ in jsons:\n",
+    "                if not glob.glob(json_):\n",
+    "                    print(json_)\n",
+    "                    continue\n",
+    "                with open(json_) as f:\n",
+    "                    s = f.read().replace(\"}{\", \"},{\")\n",
+    "                    if s[0] != \"[\":\n",
+    "                        s = \"[\" + s + \"]\"\n",
+    "                    d = json.loads(s)\n",
+    "                if json_prefix == \"instructblip_t5/\":\n",
+    "                    if json_ == json_prefix + \"cgi.json\":\n",
+    "                        # as in paper\n",
+    "                        d1, d2 = d[:3000], d[3000:6000]\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d1], [argmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d1])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d1], [argmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d1])[0])\n",
+    "                        s += (spearmanr([float(di[\"gt_score\"]) for di in d2], [argmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d2])[0])\n",
+    "                        p += (pearsonr([float(di[\"gt_score\"]) for di in d2], [argmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d2])[0])\n",
+    "                        s /= 2\n",
+    "                        p /= 2\n",
+    "                    else:\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [argmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [argmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d])[0])\n",
     "                elif json_prefix == \"qwen-vl\":\n",
     "                    print('qw')\n",
     "                    if json_ == json_prefix + \"cgi.json\":\n",
     "                        # as in paper\n",
     "                        d1, d2 = d[:3000], d[3000:6000]\n",
-    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d1], [softmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d1])[0])\n",
-    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d1], [softmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d1])[0])\n",
-    "                        s += (spearmanr([float(di[\"gt_score\"]) for di in d2], [softmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d2])[0])\n",
-    "                        p += (pearsonr([float(di[\"gt_score\"]) for di in d2], [softmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d2])[0])\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d1], [argmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d1])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d1], [argmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d1])[0])\n",
+    "                        s += (spearmanr([float(di[\"gt_score\"]) for di in d2], [argmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d2])[0])\n",
+    "                        p += (pearsonr([float(di[\"gt_score\"]) for di in d2], [argmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d2])[0])\n",
     "                        s /= 2\n",
     "                        p /= 2\n",
     "                    else:\n",
-    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d])[0])\n",
-    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d])[0])\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [argmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [argmax(di[\"logit_excellent\"], di[\"logit_poor\"]) for di in d])[0])\n",
     "                \n",
     "                else:\n",
     "                    if json_ == json_prefix + \"cgi.json\":\n",
     "                        # as in paper\n",
     "                        d1, d2 = d[:3000], d[3000:6000]\n",
-    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d1], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d1])[0])\n",
-    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d1], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d1])[0])\n",
-    "                        s += (spearmanr([float(di[\"gt_score\"]) for di in d2], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d2])[0])\n",
-    "                        p += (pearsonr([float(di[\"gt_score\"]) for di in d2], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d2])[0])\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d1], [argmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d1])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d1], [argmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d1])[0])\n",
+    "                        s += (spearmanr([float(di[\"gt_score\"]) for di in d2], [argmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d2])[0])\n",
+    "                        p += (pearsonr([float(di[\"gt_score\"]) for di in d2], [argmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d2])[0])\n",
     "                        s /= 2\n",
     "                        p /= 2\n",
     "                    else:\n",
-    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d])[0])\n",
-    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d])[0])\n",
+    "                        s = (spearmanr([float(di[\"gt_score\"]) for di in d], [argmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d])[0])\n",
+    "                        p = (pearsonr([float(di[\"gt_score\"]) for di in d], [argmax(di[\"logit_good\"], di[\"logit_poor\"]) for di in d])[0])\n",
     "                stri += \" | {:.3f}/{:.3f}\".format(s, p)\n",
     "                avg_s += s\n",
     "                avg_p += p\n",
     "    \n",
     "    print(\"|\"+stri+\"|\"+\" {:.3f}/{:.3f}|\".format(avg_s/7, avg_p/7))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "aa22635f-acb4-4e58-b532-f45395ce428a",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### What if we do not follow the LLMs' preferred output?"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "id": "3d228f01-eda9-4f47-8506-bf227ebb142f",
    "metadata": {
     "tags": []
@@ -235,6 +365,14 @@
     "        print(spearmanr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d])[0])\n",
     "        print(pearsonr([float(di[\"gt_score\"]) for di in d], [softmax(di[\"logit_high\"], di[\"logit_low\"]) for di in d])[0])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa5bac51-c353-4bab-848b-e786a57a92a6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/leaderboards/IQA_outputs/visualglm/agi.json b/leaderboards/IQA_outputs/visualglm/agi.json
diff --git a/leaderboards/IQA_outputs/visualglm/cgi.json b/leaderboards/IQA_outputs/visualglm/cgi.json
diff --git a/leaderboards/IQA_outputs/visualglm/flive.json b/leaderboards/IQA_outputs/visualglm/flive.json
diff --git a/leaderboards/IQA_outputs/visualglm/kadid.json b/leaderboards/IQA_outputs/visualglm/kadid.json
diff --git a/leaderboards/IQA_outputs/visualglm/koniq.json b/leaderboards/IQA_outputs/visualglm/koniq.json
diff --git a/leaderboards/IQA_outputs/visualglm/livec.json b/leaderboards/IQA_outputs/visualglm/livec.json
diff --git a/leaderboards/IQA_outputs/visualglm/spaq.json b/leaderboards/IQA_outputs/visualglm/spaq.json
diff --git a/leaderboards/README.md b/leaderboards/README.md
@@ -1,4 +1,4 @@
-# Leaderboards
+# Hot Leaderboards @ Oct 14
 
 <div align="center">
 
@@ -7,18 +7,18 @@ _Join the competition for low-level vision now!_
 </div>
 
 <div>
-_version_: v0.1.1012wip; _Timeliness_: Updated on 12nd Oct.   
+_version_: v0.1.1013wip; _Timeliness_: Updated on 13rd Oct.   
 </div>
 
 </div>
 
 ## Leaderboards for (A1): Perception
 
-*New! Result of LLaVA-v1.5/Qwen-VL-Chat is out!*
 
 About the partition of `dev` and `test` subsets, please see [our dataset release notes](../data_release/). As some models excel on original testing pipeline while some others perform better under PPL-based testing, we maintain two leaderboards for two different testing methods. See [examples](../example_code_for_idefics) for their different settings.
 
 ### Original Testing Pipeline
+- 13 models tested
 - via Multi-Choice Questions
 
 #### Accuracies on Open-set (`dev`)
@@ -37,7 +37,7 @@ About the partition of `dev` and `test` subsets, please see [our dataset release
 | otter_v1 | 0.5709 | 0.4071 | 0.3955 | 0.4222 | 0.4931 | 0.4408 | 0.5265 | 0.4635 |
 | qwen_vl | 0.6309 | 0.5819 | 0.5639 | 0.5058 | 0.6273 | 0.5789 | 0.7388 | 0.5940 |
 | shikra | 0.6564 | 0.4735 | 0.4909 | 0.4883 | 0.5949 | 0.5000 | 0.6408 | 0.5465 |
-
+| visualglm | 0.6018 | 0.5420 | 0.4625 | 0.5175 | 0.5440 | 0.5362 | 0.5714 | 0.5378 |
 
 
 
@@ -57,11 +57,14 @@ About the partition of `dev` and `test` subsets, please see [our dataset release
 | otter_v1 | 0.5766 | 0.3970 | 0.4259 | 0.4212 | 0.4893 | 0.4760 | 0.5417 | 0.4722 |
 | qwen_vl | 0.6533 | 0.6074 | 0.5844 | 0.5413 | 0.6635 | 0.5822 | 0.7300 | 0.6167 |
 | shikra | 0.6909 | 0.4793 | 0.4671 | 0.4731 | 0.6086 | 0.5308 | 0.6477 | 0.5532 |
+| visualglm | 0.6131 | 0.5358 | 0.4403 | 0.4856 | 0.5489 | 0.5548 | 0.5779 | 0.5331 |
+
 
 ### (*Additional*) PPL-based Testing Pipeline 
 
+- 11 models tested
 - via Losses of Different Answers
-- *non-finalized in progress version, may update*
+- *non-finalized work-in-progress version, may update*
 
 *No options are provided in prompts!*
 
@@ -99,7 +102,6 @@ shikra | 0.6515 | 0.4729 | 0.5021 | 0.4269 | 0.6205 | 0.5034 | 0.7197 | 0.5478 |
 
 ## Leaderboards for (A2): Description
 
-*New! Result of LLaVA-v1.5/Qwen-VL-Chat is out!*
 
 Abbreviations for dimensions: *comp: completeness, prec: precision, rele: relevance*
 
@@ -117,15 +119,15 @@ Abbreviations for dimensions: *comp: completeness, prec: precision, rele: releva
 | otter_v1 | 22.38% | 59.36% | 18.25% | 0.96/2.00 |  40.68% | 35.99% | 23.33% | 0.83/2.00 |  1.95% | 13.20% | 84.85% | 1.83/2.00 |  3.61/6.00 |
 | qwen_vl | 26.34% | 49.13% | 24.53% | 0.98/2.00 |  50.62% | 23.44% | 25.94% | 0.75/2.00 |  0.73% | 35.56% | 63.72% | 1.63/2.00 |  3.36/6.00 |
 | shikra | 21.14% | 68.33% | 10.52% | 0.89/2.00 |  30.33% | 28.30% | 41.37% | 1.11/2.00 |  1.14% | 64.36% | 34.50% | 1.33/2.00 |  3.34/6.00 |
-
+| visualglm | 30.75% | 56.64% | 12.61% | 0.82/2.00 |  38.64% | 26.18% | 35.18% | 0.97/2.00 |  6.14% | 67.15% | 26.71% | 1.21/2.00 |  2.99/6.00 |
 
 
 ## Leaderboards for (A3): Assessment
 
-*New! Result of LLaVA-v1.5/QWen-VL-Chat is out!*
-
 The datasets can be found [here](../a3_iqa_databases/).
 
+See [IQA_outputs/eval.ipynb](IQA_outputs/eval.ipynb) for our ablation experiments.
+
 
 | **Model Name**| SPAQ| KoNIQ-10k| LIVE-FB| LIVE-itw| CGIQA-6K| AGIQA-3K| KADID-10K| average| 
 | -| -| -| -| -| -| -| -| -| 
@@ -142,6 +144,7 @@ The datasets can be found [here](../a3_iqa_databases/).
 |otter_v1 | 0.436/0.441 | 0.406/0.406 | 0.143/0.142 | -0.008/0.018 | 0.254/0.264 | 0.475/0.481 | 0.557/0.577| 0.323/0.333|
 |qwen-vl | 0.676/0.669 | **0.470/0.546** (rank 1) | 0.298/0.338 | **0.504/0.532** (rank 1) | 0.273/0.284 | 0.617/0.686 | **0.486/0.486** (rank 1) | **0.475/0.506** (rank 1) |
 |shikra | 0.327/0.337 | 0.314/0.307 | 0.222/0.227 | 0.322/0.336 | 0.198/0.201 | 0.640/0.661 | 0.324/0.332| 0.335/0.343|
+|visualglm | 0.498/0.507 | 0.247/0.234 | 0.146/0.154 | 0.110/0.116 | 0.209/0.183 | 0.342/0.349 | 0.127/0.131| 0.240/0.239|
 
 Overall, `qwen-vl` has the best IQA performance among the models. (12st Oct); meanwhile, `llava-v1.5` (2nd rank overall) tops on AIGC/CGI images.