diff --git a/mindnlp/peft_lora_mindnlp.ipynb b/mindnlp/peft_lora_mindnlp.ipynb
deleted file mode 100644
index 51f133c95..000000000
--- a/mindnlp/peft_lora_mindnlp.ipynb
+++ /dev/null
@@ -1,2572 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "48608ac7-71cd-4859-9d27-aac6b162d2b0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/numpy/core/getlimits.py:499: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
-      "  setattr(self, word, getattr(machar, word).flat[0])\n",
-      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
-      "  return self._float_to_str(self.smallest_subnormal)\n",
-      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/numpy/core/getlimits.py:499: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
-      "  setattr(self, word, getattr(machar, word).flat[0])\n",
-      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
-      "  return self._float_to_str(self.smallest_subnormal)\n",
-      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n",
-      "Building prefix dict from the default dictionary ...\n",
-      "Loading model from cache /tmp/jieba.cache\n",
-      "Loading model cost 1.289 seconds.\n",
-      "Prefix dict has been built successfully.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "dataset column: ['image', 'label']\n",
-      "dataset size: 5000\n",
-      "dataset batch size: 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore\n",
-    "import mindnlp\n",
-    "import numpy as np\n",
-    "from mindspore import context, Tensor\n",
-    "from mindnlp.dataset import load_dataset\n",
-    "dataset = load_dataset(\"food101\", split=\"train[:5000]\")\n",
-    "\n",
-    "def show_dataset_info(dataset):\n",
-    "    print(\"dataset column: {}\".format(dataset.get_col_names()))\n",
-    "    print(\"dataset size: {}\".format(dataset.get_dataset_size()))\n",
-    "    print(\"dataset batch size: {}\".format(dataset.get_batch_size()))\n",
-    "show_dataset_info(dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "e3f69b69-c1f4-4c60-b07a-d4a63784e711",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "dataset column: ['image', 'label']\n",
-      "dataset size: 4500\n",
-      "dataset batch size: 1\n",
-      "dataset column: ['image', 'label']\n",
-      "dataset size: 500\n",
-      "dataset batch size: 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "train_ds, val_ds = dataset.split([0.9, 0.1])\n",
-    "show_dataset_info(train_ds)\n",
-    "show_dataset_info(val_ds)\n",
-    "\n",
-    "from mindnlp.transformers import ViTImageProcessor\n",
-    "image_processor = ViTImageProcessor.from_pretrained(\"google/vit-base-patch16-224-in21k\")\n",
-    "\n",
-    "def transform(image, label):\n",
-    "    # 使用图像处理器处理\n",
-    "    processed_output = image_processor(image, return_tensors='np')\n",
-    "\n",
-    "    # 获取 'pixel_values'，移除多余的批次维度\n",
-    "    pixel_values = processed_output['pixel_values']\n",
-    "    if len(pixel_values.shape) == 4 and pixel_values.shape[0] == 1:\n",
-    "        pixel_values = np.squeeze(pixel_values, axis=0)  # 移除第一个批次维度，变成 (3, 224, 224)\n",
-    "    \n",
-    "    labels = np.array([label], dtype=np.int32)\n",
-    "    return pixel_values, labels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "5af33ab7-520d-4911-a36e-20bd56da693f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "dataset column: ['pixel_values', 'labels']\n",
-      "dataset size: 281\n",
-      "dataset batch size: 16\n",
-      "dataset column: ['pixel_values', 'labels']\n",
-      "dataset size: 31\n",
-      "dataset batch size: 16\n"
-     ]
-    }
-   ],
-   "source": [
-    "# 处理训练集\n",
-    "train_ds = train_ds.map(operations=transform, input_columns=[\"image\", \"label\"], output_columns=[\"pixel_values\", \"labels\"])\n",
-    "train_ds = train_ds.batch(batch_size=16, drop_remainder=True)\n",
-    "\n",
-    "# 处理验证集\n",
-    "val_ds = val_ds.map(operations=transform, input_columns=[\"image\", \"label\"], output_columns=[\"pixel_values\", \"labels\"])\n",
-    "val_ds = val_ds.batch(batch_size=16, drop_remainder=True)\n",
-    "\n",
-    "show_dataset_info(train_ds)\n",
-    "show_dataset_info(val_ds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "760a10e1-567c-488f-b77a-a1a5b5d47394",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# 定义 Food101 的类别名称列表（需完整填写101个类别）\n",
-    "class_names = [\n",
-    "    'apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare',\n",
-    "    'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito',\n",
-    "    'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake',\n",
-    "    'ceviche', 'cheesecake', 'cheese_plate', 'chicken_curry', 'chicken_quesadilla',\n",
-    "    'chicken_wings', 'chocolate_cake', 'chocolate_mousse', 'churros', 'clam_chowder',\n",
-    "    'club_sandwich', 'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes',\n",
-    "    'deviled_eggs', 'donuts', 'dumplings', 'edamame', 'eggs_benedict',\n",
-    "    'escargots', 'falafel', 'filet_mignon', 'fish_and_chips', 'foie_gras',\n",
-    "    'french_fries', 'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice',\n",
-    "    'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad', 'grilled_cheese_sandwich',\n",
-    "    'grilled_salmon', 'guacamole', 'gyoza', 'hamburger', 'hot_and_sour_soup',\n",
-    "    'hot_dog', 'huevos_rancheros', 'hummus', 'ice_cream', 'lasagna',\n",
-    "    'lobster_bisque', 'lobster_roll_sandwich', 'macaroni_and_cheese', 'macarons', 'miso_soup',\n",
-    "    'mussels', 'nachos', 'omelette', 'onion_rings', 'oysters',\n",
-    "    'pad_thai', 'paella', 'pancakes', 'panna_cotta', 'peking_duck',\n",
-    "    'pho', 'pizza', 'pork_chop', 'poutine', 'prime_rib',\n",
-    "    'pulled_pork_sandwich', 'ramen', 'ravioli', 'red_velvet_cake', 'risotto',\n",
-    "    'samosa', 'sashimi', 'scallops', 'seaweed_salad', 'shrimp_and_grits',\n",
-    "    'spaghetti_bolognese', 'spaghetti_carbonara', 'spring_rolls', 'steak', 'strawberry_shortcake',\n",
-    "    'sushi', 'tacos', 'takoyaki', 'tiramisu', 'tuna_tartare',\n",
-    "    'waffles'\n",
-    "]\n",
-    "\n",
-    "# 创建 label2id 和 id2label 字典\n",
-    "label2id = {name: idx for idx, name in enumerate(class_names)}\n",
-    "id2label = {idx: name for idx, name in enumerate(class_names)}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "381475e6-7060-4c75-8ff9-8ea09d4a8c42",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[MS_ALLOC_CONF]Runtime config:  enable_vmm:True  vmm_align_size:2MB\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "trainable params: 85876325 || all params: 85876325 || trainable%: 100.00\n"
-     ]
-    }
-   ],
-   "source": [
-    "def print_trainable_parameters(model):\n",
-    "    \"\"\"\n",
-    "    Prints the number of trainable parameters in the model.\n",
-    "    \"\"\"\n",
-    "    trainable_params = 0\n",
-    "    all_param = 0\n",
-    "    for _, param in model.named_parameters():\n",
-    "        all_param += param.numel()\n",
-    "        if param.requires_grad:\n",
-    "            trainable_params += param.numel()\n",
-    "    print(\n",
-    "        f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}\"\n",
-    "    )\n",
-    "from mindnlp.transformers import ViTForImageClassification\n",
-    "model = ViTForImageClassification.from_pretrained(\n",
-    "    \"google/vit-base-patch16-224-in21k\",\n",
-    "    num_labels=101,\n",
-    "    label2id=label2id,\n",
-    "    id2label=id2label,\n",
-    "    ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint\n",
-    ")\n",
-    "print_trainable_parameters(model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "68fed2fb-0060-434a-9e51-316b232ae52f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "trainable params: 667493 || all params: 86543818 || trainable%: 0.77\n"
-     ]
-    }
-   ],
-   "source": [
-    "from mindnlp.peft import LoraConfig, get_peft_model\n",
-    "config = LoraConfig(\n",
-    "    r=16,\n",
-    "    lora_alpha=16,\n",
-    "    target_modules=[\"query\", \"value\"],\n",
-    "    lora_dropout=0.1,\n",
-    "    bias=\"none\",\n",
-    "    modules_to_save=[\"classifier\"],\n",
-    ")\n",
-    "lora_model = get_peft_model(model, config)\n",
-    "print_trainable_parameters(lora_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "f2dd558d-d4ca-4ebf-ba9c-6342766d2a4e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mindnlp.engine import Trainer, TrainingArguments\n",
-    "\n",
-    "training_args = TrainingArguments(\n",
-    "  output_dir=\"./vit-base-food101\",\n",
-    "  per_device_train_batch_size=128,\n",
-    "  evaluation_strategy=\"epoch\",\n",
-    "  num_train_epochs=5,\n",
-    "  fp16=True,\n",
-    "  save_steps=100,\n",
-    "  eval_steps=100,\n",
-    "  logging_steps=10,\n",
-    "  learning_rate=5e-3,\n",
-    "  save_total_limit=2,\n",
-    "  remove_unused_columns=False,\n",
-    "  load_best_model_at_end=False,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "70c986d1-fb52-464f-a1ed-5497dd94f8b9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import evaluate\n",
-    "\n",
-    "metric = evaluate.load(\"accuracy\")\n",
-    "# the compute_metrics function takes a Named Tuple as input:\n",
-    "# predictions, which are the logits of the model as Numpy arrays,\n",
-    "# and label_ids, which are the ground-truth labels as Numpy arrays.\n",
-    "def compute_metrics(eval_pred):\n",
-    "    \"\"\"Computes accuracy on a batch of predictions\"\"\"\n",
-    "    predictions = np.argmax(eval_pred.predictions, axis=1)\n",
-    "    return metric.compute(predictions=predictions, references=eval_pred.label_ids)\n",
-    "\n",
-    "trainer = Trainer(\n",
-    "    model=lora_model,\n",
-    "    args=training_args,\n",
-    "    compute_metrics=compute_metrics,\n",
-    "    train_dataset=train_ds,\n",
-    "    eval_dataset=val_ds,\n",
-    "    tokenizer=image_processor,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "fb0114ce-dabc-4704-bf88-ad9b9971f166",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  1%|          | 10/1405 [00:08<07:54,  2.94it/s] "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 2.4978, 'learning_rate': 0.004964412811387901, 'epoch': 0.04}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  1%|▏         | 20/1405 [00:10<05:57,  3.88it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4403, 'learning_rate': 0.004928825622775801, 'epoch': 0.07}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  2%|▏         | 30/1405 [00:13<05:59,  3.82it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3283, 'learning_rate': 0.004893238434163701, 'epoch': 0.11}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  3%|▎         | 40/1405 [00:16<06:02,  3.77it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2904, 'learning_rate': 0.004857651245551602, 'epoch': 0.14}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  4%|▎         | 50/1405 [00:18<05:58,  3.78it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3089, 'learning_rate': 0.004822064056939502, 'epoch': 0.18}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  4%|▍         | 60/1405 [00:21<05:57,  3.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4635, 'learning_rate': 0.004786476868327403, 'epoch': 0.21}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  5%|▍         | 70/1405 [00:24<05:54,  3.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3133, 'learning_rate': 0.004750889679715303, 'epoch': 0.25}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  6%|▌         | 80/1405 [00:26<05:57,  3.70it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.5148, 'learning_rate': 0.004715302491103203, 'epoch': 0.28}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  6%|▋         | 90/1405 [00:29<05:18,  4.13it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3734, 'learning_rate': 0.0046797153024911034, 'epoch': 0.32}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  7%|▋         | 100/1405 [00:31<05:15,  4.13it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3448, 'learning_rate': 0.004644128113879003, 'epoch': 0.36}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  8%|▊         | 110/1405 [00:35<05:30,  3.92it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4002, 'learning_rate': 0.004608540925266904, 'epoch': 0.39}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  9%|▊         | 120/1405 [00:37<05:26,  3.94it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.473, 'learning_rate': 0.004572953736654804, 'epoch': 0.43}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  9%|▉         | 130/1405 [00:40<05:21,  3.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2682, 'learning_rate': 0.004537366548042704, 'epoch': 0.46}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 10%|▉         | 140/1405 [00:42<05:22,  3.93it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3435, 'learning_rate': 0.004501779359430605, 'epoch': 0.5}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 11%|█         | 150/1405 [00:45<05:21,  3.90it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3911, 'learning_rate': 0.004466192170818505, 'epoch': 0.53}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 11%|█▏        | 160/1405 [00:47<05:03,  4.11it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2626, 'learning_rate': 0.004430604982206406, 'epoch': 0.57}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 12%|█▏        | 170/1405 [00:50<04:45,  4.32it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4019, 'learning_rate': 0.004395017793594306, 'epoch': 0.6}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 13%|█▎        | 180/1405 [00:52<04:49,  4.24it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3315, 'learning_rate': 0.004359430604982207, 'epoch': 0.64}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 14%|█▎        | 190/1405 [00:54<04:55,  4.11it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3978, 'learning_rate': 0.004323843416370107, 'epoch': 0.68}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 14%|█▍        | 200/1405 [00:57<05:03,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3234, 'learning_rate': 0.004288256227758008, 'epoch': 0.71}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 15%|█▍        | 210/1405 [01:00<05:08,  3.87it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4354, 'learning_rate': 0.004252669039145908, 'epoch': 0.75}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 16%|█▌        | 220/1405 [01:03<04:59,  3.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3388, 'learning_rate': 0.004217081850533808, 'epoch': 0.78}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 16%|█▋        | 231/1405 [01:05<04:47,  4.09it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3434, 'learning_rate': 0.004181494661921708, 'epoch': 0.82}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 17%|█▋        | 241/1405 [01:08<04:14,  4.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2394, 'learning_rate': 0.004145907473309608, 'epoch': 0.85}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 18%|█▊        | 251/1405 [01:10<04:10,  4.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2386, 'learning_rate': 0.004110320284697509, 'epoch': 0.89}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 19%|█▊        | 261/1405 [01:12<04:03,  4.69it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4592, 'learning_rate': 0.004074733096085409, 'epoch': 0.93}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 19%|█▉        | 271/1405 [01:14<04:08,  4.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.5556, 'learning_rate': 0.004039145907473309, 'epoch': 0.96}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 20%|██        | 281/1405 [01:16<03:59,  4.70it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3026, 'learning_rate': 0.00400355871886121, 'epoch': 1.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "  0%|          | 0/31 [00:00<?, ?it/s]\u001b[A\n",
-      "  6%|▋         | 2/31 [00:00<00:01, 14.72it/s]\u001b[A\n",
-      " 13%|█▎        | 4/31 [00:00<00:02,  9.96it/s]\u001b[A\n",
-      " 19%|█▉        | 6/31 [00:00<00:02,  8.82it/s]\u001b[A\n",
-      " 23%|██▎       | 7/31 [00:00<00:02,  8.68it/s]\u001b[A\n",
-      " 26%|██▌       | 8/31 [00:00<00:02,  8.44it/s]\u001b[A\n",
-      " 29%|██▉       | 9/31 [00:01<00:02,  8.27it/s]\u001b[A\n",
-      " 32%|███▏      | 10/31 [00:01<00:02,  8.20it/s]\u001b[A\n",
-      " 35%|███▌      | 11/31 [00:01<00:02,  7.94it/s]\u001b[A\n",
-      " 39%|███▊      | 12/31 [00:01<00:02,  8.10it/s]\u001b[A\n",
-      " 42%|████▏     | 13/31 [00:01<00:02,  7.86it/s]\u001b[A\n",
-      " 45%|████▌     | 14/31 [00:01<00:02,  8.06it/s]\u001b[A\n",
-      " 48%|████▊     | 15/31 [00:01<00:01,  8.01it/s]\u001b[A\n",
-      " 52%|█████▏    | 16/31 [00:01<00:01,  8.05it/s]\u001b[A\n",
-      " 55%|█████▍    | 17/31 [00:02<00:01,  8.02it/s]\u001b[A\n",
-      " 58%|█████▊    | 18/31 [00:02<00:01,  7.94it/s]\u001b[A\n",
-      " 61%|██████▏   | 19/31 [00:02<00:01,  7.85it/s]\u001b[A\n",
-      " 65%|██████▍   | 20/31 [00:02<00:01,  7.72it/s]\u001b[A\n",
-      " 68%|██████▊   | 21/31 [00:02<00:01,  7.94it/s]\u001b[A\n",
-      " 71%|███████   | 22/31 [00:02<00:01,  7.88it/s]\u001b[A\n",
-      " 74%|███████▍  | 23/31 [00:02<00:01,  7.97it/s]\u001b[A\n",
-      " 77%|███████▋  | 24/31 [00:02<00:00,  7.98it/s]\u001b[A\n",
-      " 81%|████████  | 25/31 [00:03<00:00,  8.00it/s]\u001b[A\n",
-      " 84%|████████▍ | 26/31 [00:03<00:00,  8.06it/s]\u001b[A\n",
-      " 87%|████████▋ | 27/31 [00:03<00:00,  7.84it/s]\u001b[A\n",
-      " 90%|█████████ | 28/31 [00:03<00:00,  7.77it/s]\u001b[A\n",
-      " 94%|█████████▎| 29/31 [00:03<00:00,  7.65it/s]\u001b[A\n",
-      "                                                  \n",
-      " 20%|██        | 281/1405 [01:20<03:59,  4.70it/s]\n",
-      "100%|██████████| 31/31 [00:03<00:00,  7.70it/s]\u001b[A\n",
-      "                                               \u001b[A"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_runtime': 4.066, 'eval_samples_per_second': 7.624, 'eval_steps_per_second': 0.984, 'epoch': 1.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 21%|██        | 290/1405 [01:23<06:04,  3.06it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2743, 'learning_rate': 0.00396797153024911, 'epoch': 1.03}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 21%|██▏       | 300/1405 [01:25<04:56,  3.73it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2174, 'learning_rate': 0.003932384341637011, 'epoch': 1.07}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 22%|██▏       | 310/1405 [01:29<04:55,  3.71it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2764, 'learning_rate': 0.0038967971530249113, 'epoch': 1.1}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 23%|██▎       | 320/1405 [01:32<04:47,  3.77it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3674, 'learning_rate': 0.0038612099644128113, 'epoch': 1.14}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 23%|██▎       | 330/1405 [01:34<04:46,  3.75it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1814, 'learning_rate': 0.0038256227758007117, 'epoch': 1.17}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 24%|██▍       | 340/1405 [01:37<04:33,  3.90it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1703, 'learning_rate': 0.003790035587188612, 'epoch': 1.21}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 25%|██▍       | 350/1405 [01:40<04:28,  3.93it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2488, 'learning_rate': 0.0037544483985765126, 'epoch': 1.25}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 26%|██▌       | 360/1405 [01:42<04:19,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3717, 'learning_rate': 0.003718861209964413, 'epoch': 1.28}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 26%|██▋       | 370/1405 [01:45<04:23,  3.92it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2624, 'learning_rate': 0.003683274021352313, 'epoch': 1.32}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 27%|██▋       | 380/1405 [01:47<04:15,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.239, 'learning_rate': 0.0036476868327402134, 'epoch': 1.35}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 28%|██▊       | 390/1405 [01:50<04:10,  4.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3511, 'learning_rate': 0.003612099644128114, 'epoch': 1.39}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 28%|██▊       | 400/1405 [01:52<04:10,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3081, 'learning_rate': 0.0035765124555160142, 'epoch': 1.42}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 29%|██▉       | 410/1405 [01:55<04:21,  3.80it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2136, 'learning_rate': 0.003540925266903915, 'epoch': 1.46}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 30%|██▉       | 420/1405 [01:58<04:12,  3.91it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2243, 'learning_rate': 0.0035053380782918146, 'epoch': 1.49}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 31%|███       | 430/1405 [02:01<04:07,  3.94it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2487, 'learning_rate': 0.0034697508896797155, 'epoch': 1.53}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 31%|███▏      | 440/1405 [02:03<04:00,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1855, 'learning_rate': 0.003434163701067616, 'epoch': 1.57}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 32%|███▏      | 450/1405 [02:06<03:50,  4.14it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3245, 'learning_rate': 0.0033985765124555163, 'epoch': 1.6}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 33%|███▎      | 460/1405 [02:08<03:45,  4.19it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2437, 'learning_rate': 0.0033629893238434167, 'epoch': 1.64}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 33%|███▎      | 470/1405 [02:10<03:43,  4.18it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2277, 'learning_rate': 0.0033274021352313167, 'epoch': 1.67}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 34%|███▍      | 480/1405 [02:13<03:54,  3.94it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2177, 'learning_rate': 0.003291814946619217, 'epoch': 1.71}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 35%|███▍      | 490/1405 [02:16<04:02,  3.78it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2843, 'learning_rate': 0.0032562277580071176, 'epoch': 1.74}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 36%|███▌      | 500/1405 [02:18<03:49,  3.95it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2192, 'learning_rate': 0.003220640569395018, 'epoch': 1.78}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 36%|███▋      | 511/1405 [02:22<03:39,  4.07it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2916, 'learning_rate': 0.003185053380782918, 'epoch': 1.81}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 37%|███▋      | 521/1405 [02:24<03:14,  4.54it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1664, 'learning_rate': 0.0031494661921708184, 'epoch': 1.85}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 38%|███▊      | 531/1405 [02:26<03:04,  4.73it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1511, 'learning_rate': 0.003113879003558719, 'epoch': 1.89}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 39%|███▊      | 541/1405 [02:28<03:01,  4.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3454, 'learning_rate': 0.0030782918149466192, 'epoch': 1.92}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 39%|███▉      | 551/1405 [02:30<02:56,  4.83it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.4991, 'learning_rate': 0.0030427046263345196, 'epoch': 1.96}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 40%|███▉      | 561/1405 [02:32<02:57,  4.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1244, 'learning_rate': 0.0030071174377224196, 'epoch': 1.99}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 40%|████      | 562/1405 [02:32<02:57,  4.76it/s]\n",
-      "  0%|          | 0/31 [00:00<?, ?it/s]\u001b[A\n",
-      "  6%|▋         | 2/31 [00:00<00:01, 15.91it/s]\u001b[A\n",
-      " 13%|█▎        | 4/31 [00:00<00:02, 10.54it/s]\u001b[A\n",
-      " 19%|█▉        | 6/31 [00:00<00:02,  9.50it/s]\u001b[A\n",
-      " 26%|██▌       | 8/31 [00:00<00:02,  8.88it/s]\u001b[A\n",
-      " 29%|██▉       | 9/31 [00:00<00:02,  8.73it/s]\u001b[A\n",
-      " 32%|███▏      | 10/31 [00:01<00:02,  8.63it/s]\u001b[A\n",
-      " 35%|███▌      | 11/31 [00:01<00:02,  8.44it/s]\u001b[A\n",
-      " 39%|███▊      | 12/31 [00:01<00:02,  8.54it/s]\u001b[A\n",
-      " 42%|████▏     | 13/31 [00:01<00:02,  8.36it/s]\u001b[A\n",
-      " 45%|████▌     | 14/31 [00:01<00:02,  8.32it/s]\u001b[A\n",
-      " 48%|████▊     | 15/31 [00:01<00:01,  8.36it/s]\u001b[A\n",
-      " 52%|█████▏    | 16/31 [00:01<00:01,  8.35it/s]\u001b[A\n",
-      " 55%|█████▍    | 17/31 [00:01<00:01,  8.16it/s]\u001b[A\n",
-      " 58%|█████▊    | 18/31 [00:02<00:01,  8.28it/s]\u001b[A\n",
-      " 61%|██████▏   | 19/31 [00:02<00:01,  8.14it/s]\u001b[A\n",
-      " 65%|██████▍   | 20/31 [00:02<00:01,  8.16it/s]\u001b[A\n",
-      " 68%|██████▊   | 21/31 [00:02<00:01,  8.24it/s]\u001b[A\n",
-      " 71%|███████   | 22/31 [00:02<00:01,  8.16it/s]\u001b[A\n",
-      " 74%|███████▍  | 23/31 [00:02<00:00,  8.21it/s]\u001b[A\n",
-      " 77%|███████▋  | 24/31 [00:02<00:00,  8.34it/s]\u001b[A\n",
-      " 81%|████████  | 25/31 [00:02<00:00,  8.41it/s]\u001b[A\n",
-      " 84%|████████▍ | 26/31 [00:03<00:00,  8.35it/s]\u001b[A\n",
-      " 87%|████████▋ | 27/31 [00:03<00:00,  8.20it/s]\u001b[A\n",
-      " 90%|█████████ | 28/31 [00:03<00:00,  8.21it/s]\u001b[A\n",
-      " 94%|█████████▎| 29/31 [00:03<00:00,  8.14it/s]\u001b[A\n",
-      "                                                  \n",
-      " 40%|████      | 562/1405 [02:36<02:57,  4.76it/s]\n",
-      "100%|██████████| 31/31 [00:03<00:00,  8.11it/s]\u001b[A\n",
-      "                                               \u001b[A"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_runtime': 3.8762, 'eval_samples_per_second': 7.998, 'eval_steps_per_second': 1.032, 'epoch': 2.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 41%|████      | 570/1405 [02:39<05:02,  2.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2066, 'learning_rate': 0.00297153024911032, 'epoch': 2.03}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 41%|████▏     | 580/1405 [02:41<03:29,  3.93it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1604, 'learning_rate': 0.002935943060498221, 'epoch': 2.06}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 42%|████▏     | 590/1405 [02:44<03:22,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1734, 'learning_rate': 0.0029003558718861213, 'epoch': 2.1}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 43%|████▎     | 600/1405 [02:46<03:30,  3.82it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.225, 'learning_rate': 0.0028647686832740217, 'epoch': 2.14}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 43%|████▎     | 610/1405 [02:50<03:42,  3.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1195, 'learning_rate': 0.0028291814946619217, 'epoch': 2.17}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 44%|████▍     | 620/1405 [02:53<03:23,  3.86it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1132, 'learning_rate': 0.002793594306049822, 'epoch': 2.21}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 45%|████▍     | 630/1405 [02:55<03:14,  3.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1394, 'learning_rate': 0.0027580071174377226, 'epoch': 2.24}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 46%|████▌     | 640/1405 [02:58<03:14,  3.93it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2004, 'learning_rate': 0.002722419928825623, 'epoch': 2.28}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 46%|████▋     | 650/1405 [03:00<03:14,  3.88it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2179, 'learning_rate': 0.0026868327402135234, 'epoch': 2.31}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 47%|████▋     | 660/1405 [03:03<03:08,  3.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2077, 'learning_rate': 0.0026512455516014234, 'epoch': 2.35}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 48%|████▊     | 670/1405 [03:05<03:05,  3.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1259, 'learning_rate': 0.002615658362989324, 'epoch': 2.38}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 48%|████▊     | 680/1405 [03:08<03:11,  3.78it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.219, 'learning_rate': 0.0025800711743772242, 'epoch': 2.42}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 49%|████▉     | 690/1405 [03:11<03:02,  3.91it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.066, 'learning_rate': 0.0025444839857651246, 'epoch': 2.46}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 50%|████▉     | 700/1405 [03:13<03:08,  3.75it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1429, 'learning_rate': 0.002508896797153025, 'epoch': 2.49}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 51%|█████     | 710/1405 [03:17<03:13,  3.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0856, 'learning_rate': 0.0024733096085409255, 'epoch': 2.53}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 51%|█████     | 720/1405 [03:20<03:00,  3.80it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0575, 'learning_rate': 0.0024377224199288255, 'epoch': 2.56}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 52%|█████▏    | 730/1405 [03:22<02:45,  4.08it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1645, 'learning_rate': 0.002402135231316726, 'epoch': 2.6}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 53%|█████▎    | 740/1405 [03:25<02:42,  4.08it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1252, 'learning_rate': 0.0023665480427046263, 'epoch': 2.63}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 53%|█████▎    | 750/1405 [03:27<02:43,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0827, 'learning_rate': 0.0023309608540925267, 'epoch': 2.67}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 54%|█████▍    | 760/1405 [03:30<02:42,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1603, 'learning_rate': 0.002295373665480427, 'epoch': 2.7}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 55%|█████▍    | 770/1405 [03:32<02:39,  3.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1752, 'learning_rate': 0.0022597864768683276, 'epoch': 2.74}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 56%|█████▌    | 780/1405 [03:35<02:34,  4.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.2019, 'learning_rate': 0.002224199288256228, 'epoch': 2.78}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 56%|█████▌    | 790/1405 [03:37<02:34,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0868, 'learning_rate': 0.002188612099644128, 'epoch': 2.81}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 57%|█████▋    | 800/1405 [03:39<02:14,  4.51it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0869, 'learning_rate': 0.0021530249110320284, 'epoch': 2.85}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 58%|█████▊    | 811/1405 [03:43<02:13,  4.45it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0518, 'learning_rate': 0.002117437722419929, 'epoch': 2.88}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 58%|█████▊    | 821/1405 [03:45<02:08,  4.55it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.3209, 'learning_rate': 0.002081850533807829, 'epoch': 2.92}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 59%|█████▉    | 831/1405 [03:47<02:07,  4.52it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1615, 'learning_rate': 0.0020462633451957296, 'epoch': 2.95}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|█████▉    | 841/1405 [03:49<02:04,  4.54it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0715, 'learning_rate': 0.00201067615658363, 'epoch': 2.99}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|██████    | 843/1405 [03:50<02:03,  4.56it/s]\n",
-      "  0%|          | 0/31 [00:00<?, ?it/s]\u001b[A\n",
-      "  6%|▋         | 2/31 [00:00<00:01, 15.12it/s]\u001b[A\n",
-      " 13%|█▎        | 4/31 [00:00<00:02, 10.47it/s]\u001b[A\n",
-      " 19%|█▉        | 6/31 [00:00<00:02,  9.31it/s]\u001b[A\n",
-      " 23%|██▎       | 7/31 [00:00<00:02,  9.07it/s]\u001b[A\n",
-      " 26%|██▌       | 8/31 [00:00<00:02,  8.86it/s]\u001b[A\n",
-      " 29%|██▉       | 9/31 [00:00<00:02,  8.76it/s]\u001b[A\n",
-      " 32%|███▏      | 10/31 [00:01<00:02,  8.75it/s]\u001b[A\n",
-      " 35%|███▌      | 11/31 [00:01<00:02,  8.60it/s]\u001b[A\n",
-      " 39%|███▊      | 12/31 [00:01<00:02,  8.61it/s]\u001b[A\n",
-      " 42%|████▏     | 13/31 [00:01<00:02,  8.41it/s]\u001b[A\n",
-      " 45%|████▌     | 14/31 [00:01<00:02,  8.50it/s]\u001b[A\n",
-      " 48%|████▊     | 15/31 [00:01<00:01,  8.71it/s]\u001b[A\n",
-      " 52%|█████▏    | 16/31 [00:01<00:01,  8.67it/s]\u001b[A\n",
-      " 55%|█████▍    | 17/31 [00:01<00:01,  8.43it/s]\u001b[A\n",
-      " 58%|█████▊    | 18/31 [00:02<00:01,  8.50it/s]\u001b[A\n",
-      " 61%|██████▏   | 19/31 [00:02<00:01,  8.45it/s]\u001b[A\n",
-      " 65%|██████▍   | 20/31 [00:02<00:01,  8.45it/s]\u001b[A\n",
-      " 68%|██████▊   | 21/31 [00:02<00:01,  8.42it/s]\u001b[A\n",
-      " 71%|███████   | 22/31 [00:02<00:01,  8.58it/s]\u001b[A\n",
-      " 74%|███████▍  | 23/31 [00:02<00:00,  8.86it/s]\u001b[A\n",
-      " 77%|███████▋  | 24/31 [00:02<00:00,  8.64it/s]\u001b[A\n",
-      " 81%|████████  | 25/31 [00:02<00:00,  8.73it/s]\u001b[A\n",
-      " 84%|████████▍ | 26/31 [00:02<00:00,  8.64it/s]\u001b[A\n",
-      " 87%|████████▋ | 27/31 [00:03<00:00,  8.48it/s]\u001b[A\n",
-      " 90%|█████████ | 28/31 [00:03<00:00,  8.66it/s]\u001b[A\n",
-      " 94%|█████████▎| 29/31 [00:03<00:00,  8.52it/s]\u001b[A\n",
-      "                                                  \n",
-      " 60%|██████    | 843/1405 [03:53<02:03,  4.56it/s]\n",
-      "100%|██████████| 31/31 [00:03<00:00,  8.47it/s]\u001b[A\n",
-      "                                               \u001b[A"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_runtime': 3.7621, 'eval_samples_per_second': 8.24, 'eval_steps_per_second': 1.063, 'epoch': 3.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|██████    | 850/1405 [03:55<03:38,  2.53it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1306, 'learning_rate': 0.0019750889679715305, 'epoch': 3.02}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 61%|██████    | 860/1405 [03:58<02:21,  3.85it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0744, 'learning_rate': 0.0019395017793594304, 'epoch': 3.06}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 62%|██████▏   | 870/1405 [04:00<02:14,  3.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0966, 'learning_rate': 0.001903914590747331, 'epoch': 3.1}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 63%|██████▎   | 880/1405 [04:03<02:12,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1078, 'learning_rate': 0.0018683274021352315, 'epoch': 3.13}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 63%|██████▎   | 890/1405 [04:06<02:10,  3.94it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1362, 'learning_rate': 0.0018327402135231317, 'epoch': 3.17}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 64%|██████▍   | 900/1405 [04:08<02:08,  3.93it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1054, 'learning_rate': 0.0017971530249110321, 'epoch': 3.2}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 65%|██████▍   | 910/1405 [04:12<02:13,  3.69it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0424, 'learning_rate': 0.0017615658362989323, 'epoch': 3.24}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 65%|██████▌   | 920/1405 [04:14<02:01,  3.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0521, 'learning_rate': 0.0017259786476868327, 'epoch': 3.27}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 66%|██████▌   | 930/1405 [04:17<01:58,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.083, 'learning_rate': 0.0016903914590747332, 'epoch': 3.31}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 67%|██████▋   | 940/1405 [04:19<01:55,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0776, 'learning_rate': 0.0016548042704626334, 'epoch': 3.35}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 68%|██████▊   | 950/1405 [04:22<01:54,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.059, 'learning_rate': 0.001619217081850534, 'epoch': 3.38}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 68%|██████▊   | 960/1405 [04:24<01:52,  3.95it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.081, 'learning_rate': 0.0015836298932384342, 'epoch': 3.42}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 69%|██████▉   | 970/1405 [04:27<01:48,  4.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0528, 'learning_rate': 0.0015480427046263346, 'epoch': 3.45}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 70%|██████▉   | 980/1405 [04:29<01:47,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.038, 'learning_rate': 0.001512455516014235, 'epoch': 3.49}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 70%|███████   | 990/1405 [04:32<01:44,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0212, 'learning_rate': 0.0014768683274021352, 'epoch': 3.52}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 71%|███████   | 1000/1405 [04:34<01:41,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.033, 'learning_rate': 0.0014412811387900357, 'epoch': 3.56}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 72%|███████▏  | 1010/1405 [04:38<01:42,  3.85it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0407, 'learning_rate': 0.0014056939501779359, 'epoch': 3.59}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 73%|███████▎  | 1020/1405 [04:40<01:35,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0189, 'learning_rate': 0.0013701067615658363, 'epoch': 3.63}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 73%|███████▎  | 1030/1405 [04:43<01:33,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0504, 'learning_rate': 0.0013345195729537365, 'epoch': 3.67}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 74%|███████▍  | 1040/1405 [04:45<01:30,  4.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0404, 'learning_rate': 0.0012989323843416371, 'epoch': 3.7}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 75%|███████▍  | 1050/1405 [04:48<01:27,  4.05it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.1258, 'learning_rate': 0.0012633451957295375, 'epoch': 3.74}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 75%|███████▌  | 1060/1405 [04:50<01:25,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0474, 'learning_rate': 0.0012277580071174377, 'epoch': 3.77}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 76%|███████▌  | 1070/1405 [04:53<01:22,  4.06it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0307, 'learning_rate': 0.0011921708185053382, 'epoch': 3.81}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 77%|███████▋  | 1081/1405 [04:55<01:11,  4.52it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0077, 'learning_rate': 0.0011565836298932386, 'epoch': 3.84}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 78%|███████▊  | 1091/1405 [04:57<01:08,  4.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.007, 'learning_rate': 0.0011209964412811388, 'epoch': 3.88}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 78%|███████▊  | 1100/1405 [04:59<01:05,  4.68it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0502, 'learning_rate': 0.0010854092526690392, 'epoch': 3.91}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 79%|███████▉  | 1111/1405 [05:03<01:06,  4.43it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0447, 'learning_rate': 0.0010498220640569394, 'epoch': 3.95}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|███████▉  | 1121/1405 [05:05<01:02,  4.54it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0242, 'learning_rate': 0.00101423487544484, 'epoch': 3.99}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|████████  | 1124/1405 [05:06<01:00,  4.65it/s]\n",
-      "  0%|          | 0/31 [00:00<?, ?it/s]\u001b[A\n",
-      "  6%|▋         | 2/31 [00:00<00:01, 15.89it/s]\u001b[A\n",
-      " 13%|█▎        | 4/31 [00:00<00:02, 10.48it/s]\u001b[A\n",
-      " 19%|█▉        | 6/31 [00:00<00:02,  9.29it/s]\u001b[A\n",
-      " 23%|██▎       | 7/31 [00:00<00:02,  8.96it/s]\u001b[A\n",
-      " 26%|██▌       | 8/31 [00:00<00:02,  8.82it/s]\u001b[A\n",
-      " 29%|██▉       | 9/31 [00:00<00:02,  8.57it/s]\u001b[A\n",
-      " 32%|███▏      | 10/31 [00:01<00:02,  8.46it/s]\u001b[A\n",
-      " 35%|███▌      | 11/31 [00:01<00:02,  8.34it/s]\u001b[A\n",
-      " 39%|███▊      | 12/31 [00:01<00:02,  8.18it/s]\u001b[A\n",
-      " 42%|████▏     | 13/31 [00:01<00:02,  8.09it/s]\u001b[A\n",
-      " 45%|████▌     | 14/31 [00:01<00:02,  8.04it/s]\u001b[A\n",
-      " 48%|████▊     | 15/31 [00:01<00:01,  8.23it/s]\u001b[A\n",
-      " 52%|█████▏    | 16/31 [00:01<00:01,  8.23it/s]\u001b[A\n",
-      " 55%|█████▍    | 17/31 [00:01<00:01,  8.10it/s]\u001b[A\n",
-      " 58%|█████▊    | 18/31 [00:02<00:01,  8.16it/s]\u001b[A\n",
-      " 61%|██████▏   | 19/31 [00:02<00:01,  8.16it/s]\u001b[A\n",
-      " 65%|██████▍   | 20/31 [00:02<00:01,  7.92it/s]\u001b[A\n",
-      " 68%|██████▊   | 21/31 [00:02<00:01,  8.01it/s]\u001b[A\n",
-      " 71%|███████   | 22/31 [00:02<00:01,  8.14it/s]\u001b[A\n",
-      " 74%|███████▍  | 23/31 [00:02<00:00,  8.27it/s]\u001b[A\n",
-      " 77%|███████▋  | 24/31 [00:02<00:00,  8.02it/s]\u001b[A\n",
-      " 81%|████████  | 25/31 [00:02<00:00,  8.09it/s]\u001b[A\n",
-      " 84%|████████▍ | 26/31 [00:03<00:00,  8.29it/s]\u001b[A\n",
-      " 87%|████████▋ | 27/31 [00:03<00:00,  8.22it/s]\u001b[A\n",
-      " 90%|█████████ | 28/31 [00:03<00:00,  8.14it/s]\u001b[A\n",
-      " 94%|█████████▎| 29/31 [00:03<00:00,  8.00it/s]\u001b[A\n",
-      "                                                   \n",
-      " 80%|████████  | 1124/1405 [05:10<01:00,  4.65it/s]\n",
-      "100%|██████████| 31/31 [00:03<00:00,  8.07it/s]\u001b[A\n",
-      "                                               \u001b[A"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_runtime': 3.9207, 'eval_samples_per_second': 7.907, 'eval_steps_per_second': 1.02, 'epoch': 4.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|████████  | 1130/1405 [05:11<02:01,  2.26it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0142, 'learning_rate': 0.0009786476868327402, 'epoch': 4.02}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 81%|████████  | 1140/1405 [05:14<01:10,  3.78it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.016, 'learning_rate': 0.0009430604982206406, 'epoch': 4.06}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 82%|████████▏ | 1150/1405 [05:16<01:04,  3.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0702, 'learning_rate': 0.000907473309608541, 'epoch': 4.09}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 83%|████████▎ | 1160/1405 [05:19<01:01,  3.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.055, 'learning_rate': 0.0008718861209964413, 'epoch': 4.13}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 83%|████████▎ | 1170/1405 [05:21<01:00,  3.91it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0196, 'learning_rate': 0.0008362989323843416, 'epoch': 4.16}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 84%|████████▍ | 1180/1405 [05:24<00:57,  3.91it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0324, 'learning_rate': 0.0008007117437722421, 'epoch': 4.2}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 85%|████████▍ | 1190/1405 [05:27<00:54,  3.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0043, 'learning_rate': 0.0007651245551601424, 'epoch': 4.23}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 85%|████████▌ | 1200/1405 [05:29<00:50,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0206, 'learning_rate': 0.0007295373665480427, 'epoch': 4.27}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 86%|████████▌ | 1210/1405 [05:32<00:50,  3.87it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0076, 'learning_rate': 0.000693950177935943, 'epoch': 4.31}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 87%|████████▋ | 1220/1405 [05:35<00:46,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0346, 'learning_rate': 0.0006583629893238433, 'epoch': 4.34}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 88%|████████▊ | 1230/1405 [05:37<00:43,  4.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0023, 'learning_rate': 0.0006227758007117438, 'epoch': 4.38}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 88%|████████▊ | 1240/1405 [05:40<00:41,  3.99it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0092, 'learning_rate': 0.0005871886120996441, 'epoch': 4.41}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 89%|████████▉ | 1250/1405 [05:42<00:38,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0093, 'learning_rate': 0.0005516014234875445, 'epoch': 4.45}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 90%|████████▉ | 1260/1405 [05:45<00:37,  3.90it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0014, 'learning_rate': 0.0005160142348754448, 'epoch': 4.48}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 90%|█████████ | 1270/1405 [05:47<00:33,  4.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0031, 'learning_rate': 0.00048042704626334516, 'epoch': 4.52}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 91%|█████████ | 1280/1405 [05:50<00:31,  3.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0026, 'learning_rate': 0.0004448398576512456, 'epoch': 4.56}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 92%|█████████▏| 1290/1405 [05:53<00:28,  4.01it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0018, 'learning_rate': 0.0004092526690391459, 'epoch': 4.59}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 93%|█████████▎| 1300/1405 [05:55<00:26,  4.03it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0062, 'learning_rate': 0.00037366548042704626, 'epoch': 4.63}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 93%|█████████▎| 1310/1405 [05:58<00:24,  3.90it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0051, 'learning_rate': 0.0003380782918149466, 'epoch': 4.66}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 94%|█████████▍| 1320/1405 [06:01<00:21,  4.04it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0109, 'learning_rate': 0.000302491103202847, 'epoch': 4.7}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 95%|█████████▍| 1330/1405 [06:03<00:18,  4.06it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0061, 'learning_rate': 0.00026690391459074735, 'epoch': 4.73}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 95%|█████████▌| 1340/1405 [06:06<00:16,  4.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0102, 'learning_rate': 0.0002313167259786477, 'epoch': 4.77}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 96%|█████████▌| 1350/1405 [06:08<00:14,  3.93it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0018, 'learning_rate': 0.00019572953736654805, 'epoch': 4.8}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 97%|█████████▋| 1361/1405 [06:11<00:09,  4.49it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.001, 'learning_rate': 0.00016014234875444842, 'epoch': 4.84}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 98%|█████████▊| 1371/1405 [06:13<00:07,  4.50it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0019, 'learning_rate': 0.00012455516014234875, 'epoch': 4.88}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 98%|█████████▊| 1381/1405 [06:15<00:05,  4.49it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0065, 'learning_rate': 8.896797153024912e-05, 'epoch': 4.91}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 99%|█████████▉| 1391/1405 [06:17<00:03,  4.47it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0054, 'learning_rate': 5.338078291814947e-05, 'epoch': 4.95}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|█████████▉| 1400/1405 [06:19<00:01,  4.63it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.0013, 'learning_rate': 1.779359430604982e-05, 'epoch': 4.98}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 1405/1405 [06:21<00:00,  3.76it/s]\n",
-      "  0%|          | 0/31 [00:00<?, ?it/s]\u001b[A\n",
-      "  6%|▋         | 2/31 [00:00<00:02, 14.20it/s]\u001b[A\n",
-      " 13%|█▎        | 4/31 [00:00<00:02, 10.02it/s]\u001b[A\n",
-      " 19%|█▉        | 6/31 [00:00<00:02,  9.00it/s]\u001b[A\n",
-      " 23%|██▎       | 7/31 [00:00<00:02,  8.68it/s]\u001b[A\n",
-      " 26%|██▌       | 8/31 [00:00<00:02,  8.39it/s]\u001b[A\n",
-      " 29%|██▉       | 9/31 [00:01<00:02,  8.24it/s]\u001b[A\n",
-      " 32%|███▏      | 10/31 [00:01<00:02,  8.21it/s]\u001b[A\n",
-      " 35%|███▌      | 11/31 [00:01<00:02,  7.97it/s]\u001b[A\n",
-      " 39%|███▊      | 12/31 [00:01<00:02,  8.08it/s]\u001b[A\n",
-      " 42%|████▏     | 13/31 [00:01<00:02,  7.92it/s]\u001b[A\n",
-      " 45%|████▌     | 14/31 [00:01<00:02,  8.09it/s]\u001b[A\n",
-      " 48%|████▊     | 15/31 [00:01<00:01,  8.11it/s]\u001b[A\n",
-      " 52%|█████▏    | 16/31 [00:01<00:01,  8.09it/s]\u001b[A\n",
-      " 55%|█████▍    | 17/31 [00:02<00:01,  8.04it/s]\u001b[A\n",
-      " 58%|█████▊    | 18/31 [00:02<00:01,  7.99it/s]\u001b[A\n",
-      " 61%|██████▏   | 19/31 [00:02<00:01,  7.97it/s]\u001b[A\n",
-      " 65%|██████▍   | 20/31 [00:02<00:01,  7.82it/s]\u001b[A\n",
-      " 68%|██████▊   | 21/31 [00:02<00:01,  7.92it/s]\u001b[A\n",
-      " 71%|███████   | 22/31 [00:02<00:01,  7.96it/s]\u001b[A\n",
-      " 74%|███████▍  | 23/31 [00:02<00:00,  8.00it/s]\u001b[A\n",
-      " 77%|███████▋  | 24/31 [00:02<00:00,  7.96it/s]\u001b[A\n",
-      " 81%|████████  | 25/31 [00:03<00:00,  8.13it/s]\u001b[A\n",
-      " 84%|████████▍ | 26/31 [00:03<00:00,  8.18it/s]\u001b[A\n",
-      " 87%|████████▋ | 27/31 [00:03<00:00,  7.99it/s]\u001b[A\n",
-      " 90%|█████████ | 28/31 [00:03<00:00,  7.89it/s]\u001b[A\n",
-      " 94%|█████████▎| 29/31 [00:03<00:00,  7.85it/s]\u001b[A\n",
-      "                                                   \n",
-      "100%|██████████| 1405/1405 [06:25<00:00,  3.76it/s]\n",
-      "100%|██████████| 31/31 [00:03<00:00,  7.92it/s]\u001b[A\n",
-      "100%|██████████| 1405/1405 [06:25<00:00,  3.64it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_runtime': 4.0155, 'eval_samples_per_second': 7.72, 'eval_steps_per_second': 0.996, 'epoch': 5.0}\n",
-      "{'train_runtime': 385.5554, 'train_samples_per_second': 58.305, 'train_steps_per_second': 3.644, 'train_loss': 0.18368320296985824, 'epoch': 5.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "train_results = trainer.train()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "5a2f5f89-6969-4fa6-90de-172c9f1a21ea",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.95\n"
-     ]
-    }
-   ],
-   "source": [
-    "context.set_context(mode=context.PYNATIVE_MODE, device_target='Ascend')\n",
-    "lora_model.set_train(False)\n",
-    "\n",
-    "# 定义准确率计算函数\n",
-    "def calculate_accuracy(true_labels, predicted_labels):\n",
-    "    assert len(true_labels) == len(predicted_labels), f\"标签和预测的数量必须相同，分别为 {len(true_labels)} 和 {len(predicted_labels)}\"\n",
-    "    correct_predictions = np.sum(true_labels == predicted_labels)\n",
-    "    accuracy = correct_predictions / len(true_labels)\n",
-    "    return accuracy\n",
-    "\n",
-    "# 初始化计数器\n",
-    "correct = 0\n",
-    "total = 0\n",
-    "\n",
-    "# 遍历验证集\n",
-    "for batch in val_ds.create_dict_iterator():\n",
-    "    pixel_values = batch['pixel_values']\n",
-    "    labels = batch['labels']\n",
-    "    pixel_values = Tensor(pixel_values)\n",
-    "    labels = Tensor(labels)\n",
-    "    outputs = lora_model(pixel_values)\n",
-    "    logits = outputs.logits  # 提取 logits\n",
-    "    logits = logits.asnumpy()\n",
-    "    predicted_labels = np.argmax(logits, axis=-1)\n",
-    "    true_labels = labels.asnumpy().flatten()\n",
-    "    correct += np.sum(predicted_labels == true_labels)\n",
-    "    total += len(true_labels)\n",
-    "\n",
-    "# 计算并打印准确率\n",
-    "accuracy = correct / total\n",
-    "print(f\"Accuracy: {accuracy:.2f}\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "MindSpore",
-   "language": "python",
-   "name": "mindspore"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/mindnlp/peft_lora_pytorch.ipynb b/mindnlp/peft_lora_pytorch.ipynb
deleted file mode 100644
index 5d19d4e70..000000000
--- a/mindnlp/peft_lora_pytorch.ipynb
+++ /dev/null
@@ -1,657 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import transformers\n",
-    "import accelerate\n",
-    "import peft"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_checkpoint = \"google/vit-base-patch16-224-in21k\"  # pre-trained model from which to fine-tune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ea293c08a90446a181962959fe13ab1c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from huggingface_hub import notebook_login\n",
-    "\n",
-    "notebook_login()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from datasets import load_dataset\n",
-    "dataset = load_dataset(\"food101\", split=\"train[:5000]\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset({\n",
-      "    features: ['image', 'label'],\n",
-      "    num_rows: 5000\n",
-      "})\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels = dataset.features[\"label\"].names\n",
-    "# print(labels)\n",
-    "label2id, id2label = dict(), dict()\n",
-    "for i, label in enumerate(labels):\n",
-    "    label2id[label] = i\n",
-    "    id2label[i] = label"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ViTImageProcessor {\n",
-       "  \"do_normalize\": true,\n",
-       "  \"do_rescale\": true,\n",
-       "  \"do_resize\": true,\n",
-       "  \"image_mean\": [\n",
-       "    0.5,\n",
-       "    0.5,\n",
-       "    0.5\n",
-       "  ],\n",
-       "  \"image_processor_type\": \"ViTImageProcessor\",\n",
-       "  \"image_std\": [\n",
-       "    0.5,\n",
-       "    0.5,\n",
-       "    0.5\n",
-       "  ],\n",
-       "  \"resample\": 2,\n",
-       "  \"rescale_factor\": 0.00392156862745098,\n",
-       "  \"size\": {\n",
-       "    \"height\": 224,\n",
-       "    \"width\": 224\n",
-       "  }\n",
-       "}"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from transformers import AutoImageProcessor\n",
-    "\n",
-    "image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)\n",
-    "image_processor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torchvision.transforms import (\n",
-    "    CenterCrop,\n",
-    "    Compose,\n",
-    "    Normalize,\n",
-    "    RandomHorizontalFlip,\n",
-    "    RandomResizedCrop,\n",
-    "    Resize,\n",
-    "    ToTensor,\n",
-    ")\n",
-    "\n",
-    "normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
-    "train_transforms = Compose(\n",
-    "    [\n",
-    "        RandomResizedCrop(image_processor.size[\"height\"]),\n",
-    "        RandomHorizontalFlip(),\n",
-    "        ToTensor(),\n",
-    "        normalize,\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "val_transforms = Compose(\n",
-    "    [\n",
-    "        Resize(image_processor.size[\"height\"]),\n",
-    "        CenterCrop(image_processor.size[\"height\"]),\n",
-    "        ToTensor(),\n",
-    "        normalize,\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "\n",
-    "def preprocess_train(example_batch):\n",
-    "    \"\"\"Apply train_transforms across a batch.\"\"\"\n",
-    "    example_batch[\"pixel_values\"] = [train_transforms(image.convert(\"RGB\")) for image in example_batch[\"image\"]]\n",
-    "    return example_batch\n",
-    "\n",
-    "\n",
-    "def preprocess_val(example_batch):\n",
-    "    \"\"\"Apply val_transforms across a batch.\"\"\"\n",
-    "    example_batch[\"pixel_values\"] = [val_transforms(image.convert(\"RGB\")) for image in example_batch[\"image\"]]\n",
-    "    return example_batch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# split up training into training + validation\n",
-    "splits = dataset.train_test_split(test_size=0.1)\n",
-    "train_ds = splits[\"train\"]\n",
-    "val_ds = splits[\"test\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_ds.set_transform(preprocess_train)\n",
-    "val_ds.set_transform(preprocess_val)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def print_trainable_parameters(model):\n",
-    "    \"\"\"\n",
-    "    Prints the number of trainable parameters in the model.\n",
-    "    \"\"\"\n",
-    "    trainable_params = 0\n",
-    "    all_param = 0\n",
-    "    for _, param in model.named_parameters():\n",
-    "        all_param += param.numel()\n",
-    "        if param.requires_grad:\n",
-    "            trainable_params += param.numel()\n",
-    "    print(\n",
-    "        f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "ViTForImageClassification(\n",
-       "  (vit): ViTModel(\n",
-       "    (embeddings): ViTEmbeddings(\n",
-       "      (patch_embeddings): ViTPatchEmbeddings(\n",
-       "        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))\n",
-       "      )\n",
-       "      (dropout): Dropout(p=0.0, inplace=False)\n",
-       "    )\n",
-       "    (encoder): ViTEncoder(\n",
-       "      (layer): ModuleList(\n",
-       "        (0-11): 12 x ViTLayer(\n",
-       "          (attention): ViTSdpaAttention(\n",
-       "            (attention): ViTSdpaSelfAttention(\n",
-       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (dropout): Dropout(p=0.0, inplace=False)\n",
-       "            )\n",
-       "            (output): ViTSelfOutput(\n",
-       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (dropout): Dropout(p=0.0, inplace=False)\n",
-       "            )\n",
-       "          )\n",
-       "          (intermediate): ViTIntermediate(\n",
-       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-       "            (intermediate_act_fn): GELUActivation()\n",
-       "          )\n",
-       "          (output): ViTOutput(\n",
-       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-       "            (dropout): Dropout(p=0.0, inplace=False)\n",
-       "          )\n",
-       "          (layernorm_before): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "          (layernorm_after): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (layernorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "  )\n",
-       "  (classifier): Linear(in_features=768, out_features=101, bias=True)\n",
-       ")"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from transformers import AutoModelForImageClassification, TrainingArguments, Trainer\n",
-    "\n",
-    "model = AutoModelForImageClassification.from_pretrained(\n",
-    "    model_checkpoint,\n",
-    "    label2id=label2id,\n",
-    "    id2label=id2label,\n",
-    "    ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint\n",
-    ")\n",
-    "# print_trainable_parameters(model)\n",
-    "model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "trainable params: 667493 || all params: 86543818 || trainable%: 0.77\n"
-     ]
-    }
-   ],
-   "source": [
-    "from peft import LoraConfig, get_peft_model\n",
-    "\n",
-    "config = LoraConfig(\n",
-    "    r=16,\n",
-    "    lora_alpha=16,\n",
-    "    target_modules=[\"query\", \"value\"],\n",
-    "    lora_dropout=0.1,\n",
-    "    bias=\"none\",\n",
-    "    modules_to_save=[\"classifier\"],\n",
-    ")\n",
-    "lora_model = get_peft_model(model, config)\n",
-    "print_trainable_parameters(lora_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from transformers import TrainingArguments, Trainer\n",
-    "\n",
-    "\n",
-    "model_name = model_checkpoint.split(\"/\")[-1]\n",
-    "batch_size = 128\n",
-    "\n",
-    "args = TrainingArguments(\n",
-    "    f\"{model_name}-finetuned-lora-food101\",\n",
-    "    remove_unused_columns=False,\n",
-    "    eval_strategy=\"epoch\",\n",
-    "    save_strategy=\"epoch\",\n",
-    "    learning_rate=5e-3,\n",
-    "    per_device_train_batch_size=batch_size,\n",
-    "    gradient_accumulation_steps=4,\n",
-    "    per_device_eval_batch_size=batch_size,\n",
-    "    fp16=True,\n",
-    "    num_train_epochs=5,\n",
-    "    logging_steps=10,\n",
-    "    load_best_model_at_end=True,\n",
-    "    metric_for_best_model=\"accuracy\",\n",
-    "    push_to_hub=True,\n",
-    "    label_names=[\"labels\"],\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import evaluate\n",
-    "\n",
-    "metric = evaluate.load(\"accuracy\")\n",
-    "\n",
-    "\n",
-    "# the compute_metrics function takes a Named Tuple as input:\n",
-    "# predictions, which are the logits of the model as Numpy arrays,\n",
-    "# and label_ids, which are the ground-truth labels as Numpy arrays.\n",
-    "def compute_metrics(eval_pred):\n",
-    "    \"\"\"Computes accuracy on a batch of predictions\"\"\"\n",
-    "    predictions = np.argmax(eval_pred.predictions, axis=1)\n",
-    "    return metric.compute(predictions=predictions, references=eval_pred.label_ids)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "\n",
-    "\n",
-    "def collate_fn(examples):\n",
-    "    pixel_values = torch.stack([example[\"pixel_values\"] for example in examples])\n",
-    "    labels = torch.tensor([example[\"label\"] for example in examples])\n",
-    "    return {\"pixel_values\": pixel_values, \"labels\": labels}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\19895\\AppData\\Local\\conda\\conda\\envs\\tg\\lib\\site-packages\\accelerate\\accelerator.py:494: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
-      "  self.scaler = torch.cuda.amp.GradScaler(**kwargs)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5b7c23601ba246aebd325ef408f8f0b4",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/45 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\19895\\AppData\\Local\\conda\\conda\\envs\\tg\\lib\\site-packages\\transformers\\models\\vit\\modeling_vit.py:252: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
-      "  context_layer = torch.nn.functional.scaled_dot_product_attention(\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "85cbdb4de45c49878b9e8c054892370a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_loss': 0.5615572333335876, 'eval_accuracy': 0.868, 'eval_runtime': 37.8138, 'eval_samples_per_second': 13.223, 'eval_steps_per_second': 0.106, 'epoch': 1.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "'(ProtocolError('Connection aborted.', TimeoutError(10060, '由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。', None, 10060, None)), '(Request ID: e93a87aa-325e-476d-9576-42901da42ff3)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/3f/b6/3fb64b994b7e9c94ae8373328ec555886624ccb4ef3bf137728360a6b8a0bef8/7a0ecadb8a10cf24e0a7ebf2d1ddcb52d269c7de199e9cd84fdd5e2760a85e0b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20240925%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240925T064653Z&X-Amz-Expires=900&X-Amz-Signature=7b31e240b857f26b8ba43adb16a06b5f8af9ae34eec8698784e9f5e42fe765fd&X-Amz-SignedHeaders=host&x-amz-storage-class=INTELLIGENT_TIERING&x-id=PutObject\n",
-      "Retrying in 1s [Retry 1/5].\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 2.1801, 'grad_norm': 0.4545424282550812, 'learning_rate': 0.003888888888888889, 'epoch': 1.11}\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2122a4775c214dacbc98a42d7fd421a7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_loss': 0.1696804314851761, 'eval_accuracy': 0.938, 'eval_runtime': 35.2077, 'eval_samples_per_second': 14.201, 'eval_steps_per_second': 0.114, 'epoch': 2.0}\n",
-      "{'loss': 0.3292, 'grad_norm': 0.14382442831993103, 'learning_rate': 0.002777777777777778, 'epoch': 2.22}\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "304774fc61614f338169a27176bc1c4b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_loss': 0.11115420609712601, 'eval_accuracy': 0.962, 'eval_runtime': 38.2923, 'eval_samples_per_second': 13.057, 'eval_steps_per_second': 0.104, 'epoch': 3.0}\n",
-      "{'loss': 0.1956, 'grad_norm': 0.15067866444587708, 'learning_rate': 0.0016666666666666666, 'epoch': 3.33}\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7357431d3ff442e5b6ccb56a53a8213e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_loss': 0.10155972838401794, 'eval_accuracy': 0.966, 'eval_runtime': 38.1734, 'eval_samples_per_second': 13.098, 'eval_steps_per_second': 0.105, 'epoch': 4.0}\n",
-      "{'loss': 0.1571, 'grad_norm': 0.20834752917289734, 'learning_rate': 0.0005555555555555556, 'epoch': 4.44}\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4e23aae8154c4a738ee7a9622ec2d1ee",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_loss': 0.0965295284986496, 'eval_accuracy': 0.968, 'eval_runtime': 39.0391, 'eval_samples_per_second': 12.808, 'eval_steps_per_second': 0.102, 'epoch': 5.0}\n",
-      "{'train_runtime': 4223.0665, 'train_samples_per_second': 5.328, 'train_steps_per_second': 0.011, 'train_loss': 0.655164533191257, 'epoch': 5.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No files have been modified since last commit. Skipping to prevent empty commit.\n"
-     ]
-    }
-   ],
-   "source": [
-    "trainer = Trainer(\n",
-    "    lora_model,\n",
-    "    args,\n",
-    "    train_dataset=train_ds,\n",
-    "    eval_dataset=val_ds,\n",
-    "    tokenizer=image_processor,\n",
-    "    compute_metrics=compute_metrics,\n",
-    "    data_collator=collate_fn,\n",
-    ")\n",
-    "train_results = trainer.train()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9a22d3faa5764ba5b77694a721cd6c69",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'eval_loss': 0.0965295284986496,\n",
-       " 'eval_accuracy': 0.968,\n",
-       " 'eval_runtime': 35.2283,\n",
-       " 'eval_samples_per_second': 14.193,\n",
-       " 'eval_steps_per_second': 0.114,\n",
-       " 'epoch': 5.0}"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "trainer.evaluate(val_ds)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "tg",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.19"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}