Skip to content

Commit

Permalink
clear notebook outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
lakshith-403 committed Jul 31, 2024
1 parent 77d00f0 commit bc32b50
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 178 deletions.
75 changes: 12 additions & 63 deletions labml_nn/transformers/LoRA/experiment.ipynb
Original file line number Diff line number Diff line change
@@ -1,28 +1,18 @@
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:22:57.496965Z",
"start_time": "2024-07-31T12:22:55.151730Z"
}
},
"metadata": {},
"cell_type": "code",
"source": [
"from labml_nn.transformers.LoRA.GPT2 import GPTModel\n",
"import torch"
],
"id": "cffa3ec341b4905a",
"outputs": [],
"execution_count": 1
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:22:57.986397Z",
"start_time": "2024-07-31T12:22:57.498305Z"
}
},
"metadata": {},
"cell_type": "code",
"source": [
"from transformers import AutoTokenizer\n",
Expand All @@ -31,17 +21,13 @@
],
"id": "c2b0b7e18394ea9e",
"outputs": [],
"execution_count": 2
"execution_count": null
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-07-31T12:22:58.562136Z",
"start_time": "2024-07-31T12:22:57.987296Z"
}
"collapsed": true
},
"source": [
"model = GPTModel()\n",
Expand All @@ -54,32 +40,11 @@
"if unexpected_keys:\n",
" print(f\"Unexpected keys: {unexpected_keys}\")"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_7130/2581223434.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
" state_dict = torch.load('transformed.pth')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Missing keys: ['token_embedding.lora_a', 'token_embedding.lora_b', 'position_embedding.lora_a', 'position_embedding.lora_b', 'blocks.0.attn.c_att.lora_a', 'blocks.0.attn.c_att.lora_b', 'blocks.0.attn.c_proj.lora_a', 'blocks.0.attn.c_proj.lora_b', 'blocks.0.ffn.c_fc.lora_a', 'blocks.0.ffn.c_fc.lora_b', 'blocks.0.ffn.c_proj.lora_a', 'blocks.0.ffn.c_proj.lora_b', 'blocks.1.attn.c_att.lora_a', 'blocks.1.attn.c_att.lora_b', 'blocks.1.attn.c_proj.lora_a', 'blocks.1.attn.c_proj.lora_b', 'blocks.1.ffn.c_fc.lora_a', 'blocks.1.ffn.c_fc.lora_b', 'blocks.1.ffn.c_proj.lora_a', 'blocks.1.ffn.c_proj.lora_b', 'blocks.2.attn.c_att.lora_a', 'blocks.2.attn.c_att.lora_b', 'blocks.2.attn.c_proj.lora_a', 'blocks.2.attn.c_proj.lora_b', 'blocks.2.ffn.c_fc.lora_a', 'blocks.2.ffn.c_fc.lora_b', 'blocks.2.ffn.c_proj.lora_a', 'blocks.2.ffn.c_proj.lora_b', 'blocks.3.attn.c_att.lora_a', 'blocks.3.attn.c_att.lora_b', 'blocks.3.attn.c_proj.lora_a', 'blocks.3.attn.c_proj.lora_b', 'blocks.3.ffn.c_fc.lora_a', 'blocks.3.ffn.c_fc.lora_b', 'blocks.3.ffn.c_proj.lora_a', 'blocks.3.ffn.c_proj.lora_b', 'blocks.4.attn.c_att.lora_a', 'blocks.4.attn.c_att.lora_b', 'blocks.4.attn.c_proj.lora_a', 'blocks.4.attn.c_proj.lora_b', 'blocks.4.ffn.c_fc.lora_a', 'blocks.4.ffn.c_fc.lora_b', 'blocks.4.ffn.c_proj.lora_a', 'blocks.4.ffn.c_proj.lora_b', 'blocks.5.attn.c_att.lora_a', 'blocks.5.attn.c_att.lora_b', 'blocks.5.attn.c_proj.lora_a', 'blocks.5.attn.c_proj.lora_b', 'blocks.5.ffn.c_fc.lora_a', 'blocks.5.ffn.c_fc.lora_b', 'blocks.5.ffn.c_proj.lora_a', 'blocks.5.ffn.c_proj.lora_b', 'blocks.6.attn.c_att.lora_a', 'blocks.6.attn.c_att.lora_b', 'blocks.6.attn.c_proj.lora_a', 'blocks.6.attn.c_proj.lora_b', 'blocks.6.ffn.c_fc.lora_a', 'blocks.6.ffn.c_fc.lora_b', 'blocks.6.ffn.c_proj.lora_a', 'blocks.6.ffn.c_proj.lora_b', 'blocks.7.attn.c_att.lora_a', 'blocks.7.attn.c_att.lora_b', 'blocks.7.attn.c_proj.lora_a', 'blocks.7.attn.c_proj.lora_b', 'blocks.7.ffn.c_fc.lora_a', 'blocks.7.ffn.c_fc.lora_b', 'blocks.7.ffn.c_proj.lora_a', 'blocks.7.ffn.c_proj.lora_b', 'blocks.8.attn.c_att.lora_a', 'blocks.8.attn.c_att.lora_b', 'blocks.8.attn.c_proj.lora_a', 'blocks.8.attn.c_proj.lora_b', 'blocks.8.ffn.c_fc.lora_a', 'blocks.8.ffn.c_fc.lora_b', 'blocks.8.ffn.c_proj.lora_a', 'blocks.8.ffn.c_proj.lora_b', 'blocks.9.attn.c_att.lora_a', 'blocks.9.attn.c_att.lora_b', 'blocks.9.attn.c_proj.lora_a', 'blocks.9.attn.c_proj.lora_b', 'blocks.9.ffn.c_fc.lora_a', 'blocks.9.ffn.c_fc.lora_b', 'blocks.9.ffn.c_proj.lora_a', 'blocks.9.ffn.c_proj.lora_b', 'blocks.10.attn.c_att.lora_a', 'blocks.10.attn.c_att.lora_b', 'blocks.10.attn.c_proj.lora_a', 'blocks.10.attn.c_proj.lora_b', 'blocks.10.ffn.c_fc.lora_a', 'blocks.10.ffn.c_fc.lora_b', 'blocks.10.ffn.c_proj.lora_a', 'blocks.10.ffn.c_proj.lora_b', 'blocks.11.attn.c_att.lora_a', 'blocks.11.attn.c_att.lora_b', 'blocks.11.attn.c_proj.lora_a', 'blocks.11.attn.c_proj.lora_b', 'blocks.11.ffn.c_fc.lora_a', 'blocks.11.ffn.c_fc.lora_b', 'blocks.11.ffn.c_proj.lora_a', 'blocks.11.ffn.c_proj.lora_b', 'lm_head.lora_a', 'lm_head.lora_b']\n"
]
}
],
"execution_count": 3
"outputs": [],
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:23:00.447976Z",
"start_time": "2024-07-31T12:22:58.566527Z"
}
},
"metadata": {},
"cell_type": "code",
"source": [
"prompt = \"hello how are you\"\n",
Expand All @@ -96,32 +61,16 @@
" print(tokenizer.decode(id))"
],
"id": "f4f7826ec3729b66",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
",\n",
" to\n",
" you\n",
" doing\n"
]
}
],
"execution_count": 4
"outputs": [],
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:23:00.452060Z",
"start_time": "2024-07-31T12:23:00.448904Z"
}
},
"metadata": {},
"cell_type": "code",
"source": "",
"id": "c12776360008a974",
"outputs": [],
"execution_count": 4
"execution_count": null
}
],
"metadata": {
Expand Down
137 changes: 22 additions & 115 deletions labml_nn/transformers/LoRA/train.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,117 +7,75 @@
"collapsed": true,
"jupyter": {
"outputs_hidden": true
},
"ExecuteTime": {
"end_time": "2024-07-31T12:57:37.296030Z",
"start_time": "2024-07-31T12:57:37.292368Z"
}
},
"source": "# !wget https://raw.github/zusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt",
"outputs": [],
"execution_count": 1
"execution_count": null
},
{
"cell_type": "code",
"id": "3b1e507015ba6b81",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:37.317651Z",
"start_time": "2024-07-31T12:57:37.313808Z"
}
},
"metadata": {},
"source": [
"with open('input.txt', 'r', encoding='utf-8') as f:\n",
" text = f.read()"
],
"outputs": [],
"execution_count": 2
"execution_count": null
},
{
"cell_type": "code",
"id": "ac8e51ae5bbfcae7",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:40.488939Z",
"start_time": "2024-07-31T12:57:37.319486Z"
}
},
"metadata": {},
"source": [
"from transformers import AutoTokenizer\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
"\n",
"tokens = tokenizer.encode(text, add_special_tokens=False)"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Token indices sequence length is longer than the specified maximum sequence length for this model (338025 > 1024). Running this sequence through the model will result in indexing errors\n"
]
}
],
"execution_count": 3
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"id": "aeefcdf813e427e",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:40.495510Z",
"start_time": "2024-07-31T12:57:40.490341Z"
}
},
"metadata": {},
"source": [
"context_length = 512\n",
"batch_size = 2"
],
"outputs": [],
"execution_count": 4
"execution_count": null
},
{
"cell_type": "code",
"id": "a384b42274f008a2",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:40.522050Z",
"start_time": "2024-07-31T12:57:40.496842Z"
}
},
"metadata": {},
"source": [
"num_batches = len(tokens) // (batch_size * context_length)\n",
"tokens = tokens[:num_batches * batch_size * context_length]"
],
"outputs": [],
"execution_count": 5
"execution_count": null
},
{
"cell_type": "code",
"id": "5c4cc78ac1a02c1d",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:40.592272Z",
"start_time": "2024-07-31T12:57:40.524063Z"
}
},
"metadata": {},
"source": [
"import torch\n",
"\n",
"input_ids = torch.tensor(tokens).view(-1, context_length)"
],
"outputs": [],
"execution_count": 6
"execution_count": null
},
{
"cell_type": "code",
"id": "7037fd75e2161382",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:40.601199Z",
"start_time": "2024-07-31T12:57:40.593250Z"
}
},
"metadata": {},
"source": [
"from torch.utils.data import DataLoader, TensorDataset\n",
"from torch.optim import Adam\n",
Expand All @@ -137,17 +95,12 @@
"test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)"
],
"outputs": [],
"execution_count": 7
"execution_count": null
},
{
"cell_type": "code",
"id": "a98b7baa064b8494",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:41.577878Z",
"start_time": "2024-07-31T12:57:40.602187Z"
}
},
"metadata": {},
"source": [
"from labml_nn.transformers.LoRA.GPT2 import GPTModel\n",
"\n",
Expand All @@ -157,33 +110,23 @@
"_ = model.load_state_dict(state_dict, strict=False)"
],
"outputs": [],
"execution_count": 8
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:43.098187Z",
"start_time": "2024-07-31T12:57:41.578713Z"
}
},
"metadata": {},
"cell_type": "code",
"source": [
"device = \"cuda\"\n",
"model = model.to(device=\"cuda\")"
],
"id": "2e0fa8b3082df716",
"outputs": [],
"execution_count": 9
"execution_count": null
},
{
"cell_type": "code",
"id": "e2f5076894770740",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:57.044755Z",
"start_time": "2024-07-31T12:57:43.099050Z"
}
},
"metadata": {},
"source": [
"from labml import tracker, experiment\n",
"\n",
Expand Down Expand Up @@ -236,49 +179,13 @@
"\n",
"print(\"Training complete.\")"
],
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"<pre style=\"overflow-x: scroll;\">\n",
"<strong><span style=\"text-decoration: underline\">LoRA.GPT2</span></strong>: <span style=\"color: #208FFB\">7a14822c4f3c11efad8354ef33f17c7c</span>\n",
"\t[dirty]: <strong><span style=\"color: #DDB62B\">\"training loop\"</span></strong>\n",
"<span style=\"color: #208FFB\">Monitor experiment at </span><a href='http://localhost:5005/run/7a14822c4f3c11efad8354ef33f17c7c' target='blank'>http://localhost:5005/run/7a14822c4f3c11efad8354ef33f17c7c</a>\n",
"<strong><span style=\"color: #DDB62B\">Still updating labml server, please wait for it to complete...</span></strong></pre>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[10], line 25\u001B[0m\n\u001B[1;32m 22\u001B[0m loss \u001B[38;5;241m=\u001B[39m criterion(shift_logits\u001B[38;5;241m.\u001B[39mreshape(\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m, shift_logits\u001B[38;5;241m.\u001B[39msize(\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m)), shift_labels\u001B[38;5;241m.\u001B[39mreshape(\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m))\n\u001B[1;32m 24\u001B[0m optimizer\u001B[38;5;241m.\u001B[39mzero_grad()\n\u001B[0;32m---> 25\u001B[0m loss\u001B[38;5;241m.\u001B[39mbackward()\n\u001B[1;32m 26\u001B[0m optimizer\u001B[38;5;241m.\u001B[39mstep()\n\u001B[1;32m 28\u001B[0m tracker\u001B[38;5;241m.\u001B[39msave(step, {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mloss\u001B[39m\u001B[38;5;124m'\u001B[39m: loss})\n",
"File \u001B[0;32m~/miniconda3/lib/python3.12/site-packages/torch/_tensor.py:521\u001B[0m, in \u001B[0;36mTensor.backward\u001B[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001B[0m\n\u001B[1;32m 511\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m has_torch_function_unary(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m 512\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m handle_torch_function(\n\u001B[1;32m 513\u001B[0m Tensor\u001B[38;5;241m.\u001B[39mbackward,\n\u001B[1;32m 514\u001B[0m (\u001B[38;5;28mself\u001B[39m,),\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 519\u001B[0m inputs\u001B[38;5;241m=\u001B[39minputs,\n\u001B[1;32m 520\u001B[0m )\n\u001B[0;32m--> 521\u001B[0m torch\u001B[38;5;241m.\u001B[39mautograd\u001B[38;5;241m.\u001B[39mbackward(\n\u001B[1;32m 522\u001B[0m \u001B[38;5;28mself\u001B[39m, gradient, retain_graph, create_graph, inputs\u001B[38;5;241m=\u001B[39minputs\n\u001B[1;32m 523\u001B[0m )\n",
"File \u001B[0;32m~/miniconda3/lib/python3.12/site-packages/torch/autograd/__init__.py:289\u001B[0m, in \u001B[0;36mbackward\u001B[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001B[0m\n\u001B[1;32m 284\u001B[0m retain_graph \u001B[38;5;241m=\u001B[39m create_graph\n\u001B[1;32m 286\u001B[0m \u001B[38;5;66;03m# The reason we repeat the same comment below is that\u001B[39;00m\n\u001B[1;32m 287\u001B[0m \u001B[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001B[39;00m\n\u001B[1;32m 288\u001B[0m \u001B[38;5;66;03m# calls in the traceback and some print out the last line\u001B[39;00m\n\u001B[0;32m--> 289\u001B[0m _engine_run_backward(\n\u001B[1;32m 290\u001B[0m tensors,\n\u001B[1;32m 291\u001B[0m grad_tensors_,\n\u001B[1;32m 292\u001B[0m retain_graph,\n\u001B[1;32m 293\u001B[0m create_graph,\n\u001B[1;32m 294\u001B[0m inputs,\n\u001B[1;32m 295\u001B[0m allow_unreachable\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 296\u001B[0m accumulate_grad\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 297\u001B[0m )\n",
"File \u001B[0;32m~/miniconda3/lib/python3.12/site-packages/torch/autograd/graph.py:768\u001B[0m, in \u001B[0;36m_engine_run_backward\u001B[0;34m(t_outputs, *args, **kwargs)\u001B[0m\n\u001B[1;32m 766\u001B[0m unregister_hooks \u001B[38;5;241m=\u001B[39m _register_logging_hooks_on_whole_graph(t_outputs)\n\u001B[1;32m 767\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m--> 768\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m Variable\u001B[38;5;241m.\u001B[39m_execution_engine\u001B[38;5;241m.\u001B[39mrun_backward( \u001B[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001B[39;00m\n\u001B[1;32m 769\u001B[0m t_outputs, \u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs\n\u001B[1;32m 770\u001B[0m ) \u001B[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001B[39;00m\n\u001B[1;32m 771\u001B[0m \u001B[38;5;28;01mfinally\u001B[39;00m:\n\u001B[1;32m 772\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m attach_logging_hooks:\n",
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
]
}
],
"execution_count": 10
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"id": "da2d4023002648dc",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-31T12:57:57.046254Z",
"start_time": "2024-07-31T12:57:57.045954Z"
}
},
"metadata": {},
"source": [],
"outputs": [],
"execution_count": null
Expand Down

0 comments on commit bc32b50

Please sign in to comment.