From 4d7a84d4af2acb158449722b179e74d7f9027dc1 Mon Sep 17 00:00:00 2001
From: ruthashford-mids <ruth.ashford@berkeley.edu>
Date: Thu, 28 Jul 2022 22:26:25 -0700
Subject: [PATCH] removing old reoberta file

---
 roBERTa.ipynb | 6735 -------------------------------------------------
 1 file changed, 6735 deletions(-)
 delete mode 100644 roBERTa.ipynb

diff --git a/roBERTa.ipynb b/roBERTa.ipynb
deleted file mode 100644
index 5d7049e..0000000
--- a/roBERTa.ipynb
+++ /dev/null
@@ -1,6735 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "YXAL6gpkijkz",
-        "outputId": "eee26d43-dc2b-4bb0-b1d9-785fecda066f"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[K     |████████████████████████████████| 21.6 MB 4.9 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 4.6 MB 5.1 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 511.7 MB 4.9 kB/s \n",
-            "\u001b[K     |████████████████████████████████| 438 kB 88.4 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 1.6 MB 62.5 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 5.8 MB 88.6 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 2.1 MB 5.0 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 43 kB 2.7 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 1.1 MB 56.2 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 1.2 MB 68.6 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 116 kB 94.9 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 636 kB 72.6 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 352 kB 80.9 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 99 kB 12.3 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 238 kB 93.6 MB/s \n",
-            "\u001b[?25h  Building wheel for py-cpuinfo (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "\u001b[K     |████████████████████████████████| 24.2 MB 5.1 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 4.7 MB 5.1 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 6.6 MB 59.6 MB/s \n",
-            "\u001b[K     |████████████████████████████████| 101 kB 14.9 MB/s \n",
-            "\u001b[?25h"
-          ]
-        }
-      ],
-      "source": [
-        "!pip uninstall -y opencv-python --quiet\n",
-        "!pip install \"opencv-python-headless<4.3\" --quiet\n",
-        "!pip install -U \"tensorflow-text==2.9.*\" --quiet\n",
-        "!pip install tf-models-official --quiet\n",
-        "!pip install keras-metrics --quiet\n",
-        "!pip install gensim==3.8.3 --quiet\n",
-        "!pip install pydot --quiet\n",
-        "!pip install sentencepiece --quiet\n",
-        "!pip install transformers --quiet"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "id": "Kd0xo5RHVbg-"
-      },
-      "outputs": [],
-      "source": [
-        "from google.cloud import storage\n",
-        "import google.oauth2.credentials\n",
-        "import json\n",
-        "import seaborn as sns\n",
-        "\n",
-        "import numpy as np\n",
-        "import pandas as pd\n",
-        "import tensorflow as tf\n",
-        "from tensorflow import keras\n",
-        "from tensorflow.keras import metrics\n",
-        "import tensorflow_models as tfm\n",
-        "\n",
-        "from tensorflow.keras.layers import Embedding, Input, Dense, Lambda\n",
-        "from tensorflow.keras.models import Model\n",
-        "import tensorflow.keras.backend as K\n",
-        "import tensorflow_datasets as tfds\n",
-        "#from keras.preprocessing.sequence import pad_sequences\n",
-        "import torch\n",
-        "\n",
-        "import sklearn as sk\n",
-        "import nltk\n",
-        "from nltk.corpus import reuters\n",
-        "from nltk.data import find\n",
-        "\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "import re\n",
-        "\n",
-        "#This continues to work with gensim 3.8.3.  It doesn't yet work with 4.x.  \n",
-        "#Make sure your pip install command specifies gensim==3.8.3\n",
-        "import gensim\n",
-        "\n",
-        "from transformers import BertTokenizer, TFBertModel, XLNetTokenizer, TFXLNetForSequenceClassification, TFBertForSequenceClassification, TFRobertaForSequenceClassification, RobertaTokenizer\n",
-        "\n",
-        "\n",
-        "from sklearn.metrics import classification_report\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ouPkZabHYvwR"
-      },
-      "source": [
-        "## Data loading"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "3Umy49AUj-QH",
-        "outputId": "11dfd774-a723-4321-8fc2-29023de36425"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Mounted at /content/drive\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Access drive \n",
-        "from google.colab import drive\n",
-        "drive.mount('/content/drive')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "train = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_train.csv\")\n",
-        "test = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_test.csv\")\n",
-        "valid = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_valid.csv\")"
-      ],
-      "metadata": {
-        "id": "RuHXI9hByBSX"
-      },
-      "execution_count": 4,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "x_train = list(train[['text']].text)\n",
-        "y_train = np.asarray(train[['label']].label)\n",
-        "\n",
-        "x_test = list(test[['text']].text)\n",
-        "y_test = np.asarray(test[['label']].label)\n",
-        "\n",
-        "x_valid = list(valid[['text']].text)\n",
-        "y_valid = np.asarray(valid[['label']].label)"
-      ],
-      "metadata": {
-        "id": "WZRn3TO4yBim"
-      },
-      "execution_count": 5,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Roberta"
-      ],
-      "metadata": {
-        "id": "b1pQFKyNyha4"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "roberta_tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 113,
-          "referenced_widgets": [
-            "03d829ad4c0a419d9f75ed02c10b243e",
-            "3f87aa1685a647bb91445fd4716b8104",
-            "2ce390869afa4ae6ba4389319eddb44b",
-            "a82c381fcbc743a1a110cb7f15426373",
-            "0c547845eac94a57be770f3e5a3be938",
-            "930eb246751e49b78a358407c0f41c71",
-            "22b4ab47cfda43cda17b58583771ba9b",
-            "e033dcd9a49a485c8cbb28475703975b",
-            "895da1e0a44e434e876d1f43046c7fe2",
-            "b1f1a7fb3bc24ca0b10d3702c3bba1d2",
-            "14e048cd8ed041a99ed0019bdd900d19",
-            "d706b56424224b47897ac2ee0c0b2442",
-            "6b732f60756b46ecbeb96c737f4496b4",
-            "d21e7d26b7c94843903096fae3008538",
-            "88e4813027b148a9997d46ca01b01089",
-            "3ea2594afd164aae81d90acc80a451f1",
-            "a9a95b664df94256a8977e656716219a",
-            "0e0e076722cf467c8ba2c48ed7a421a5",
-            "d121d804a688426fa389a75348c9f827",
-            "e4400fc6e334470b883895b61c3d6037",
-            "493b1d7528604a9799db026ff8fe4feb",
-            "a1d6e88d6b7c4041afc2d4db02040233",
-            "7330000b9a704aebaeed3f67742208e6",
-            "09d57f7ca0e04c94bd803a075f7f91d9",
-            "314cf1ae89ab4afeb11f1de84a2564eb",
-            "ea1cbc99ccb0453db30f7fd735a9f2e5",
-            "5716bfe94d8547c99e45709090059659",
-            "548bd11142854b12b1537c9726690c1c",
-            "92060595691e42d382a281fb4097311e",
-            "9eefdd930bf94d22bc4ba038d3aa86ff",
-            "8bef2c5a8ee0450e95af02c3e6435902",
-            "00665f4b6a174be5b3be13831c4f274b",
-            "a7ffcd3cc547410bbbeadfde21545bf4"
-          ]
-        },
-        "id": "aU1dhDMtdWws",
-        "outputId": "79884882-f62e-40ff-80c4-f31cb53aed6c"
-      },
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "03d829ad4c0a419d9f75ed02c10b243e"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "d706b56424224b47897ac2ee0c0b2442"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "7330000b9a704aebaeed3f67742208e6"
-            }
-          },
-          "metadata": {}
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def create_roberta_model(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5,epsilon=1e-08)):\n",
-        "    \"\"\"Create a roBERTa model based on the roBERTa paper:\n",
-        "    https://arxiv.org/pdf/1907.11692.pdf \n",
-        "\n",
-        "        - model: TFRobertaForSequenceClassification\n",
-        "    \"\"\"\n",
-        "    roberta_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)\n",
-        "\n",
-        "    # Freeze all layers except the last 4 which are the pooled classification layers:\n",
-        "    untrainable = [w.name for w in roberta_model.weights[:-4]]\n",
-        "    trainable = [w.name for w in roberta_model.weights[-4:]]\n",
-        "\n",
-        "    for w in roberta_model.weights:\n",
-        "        if w.name in untrainable:\n",
-        "            w._trainable = False\n",
-        "        elif w.name in trainable:\n",
-        "            w._trainable = True\n",
-        "\n",
-        "    # Compile the model:\n",
-        "    roberta_model.compile(\n",
-        "        optimizer = optimizer,\n",
-        "        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n",
-        "        metrics = [tf.keras.metrics.SparseCategoricalAccuracy(\"accuracy\")]\n",
-        "    )\n",
-        "\n",
-        "    return roberta_model"
-      ],
-      "metadata": {
-        "id": "IOJQDQHqgfMI"
-      },
-      "execution_count": 7,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def tokenize(length, data, tokenizer):\n",
-        "  \"\"\"Tokenize text using specified tokenizer with the constraint of max_length\n",
-        "    - length: max length of tokenized output\n",
-        "    - data: text to tokenize\n",
-        "    - tokenizer\n",
-        "  \"\"\"\n",
-        "  encodings = tokenizer(data, \n",
-        "                max_length=length,\n",
-        "                truncation=True,\n",
-        "                padding='max_length', \n",
-        "                return_tensors='tf')\n",
-        "  return encodings\n",
-        "\n",
-        "def run_roberta(length, tokenizer, model_name, x_train, y_train, x_valid, y_valid, x_test, y_test, optimizer=None):\n",
-        "  \"\"\" Tokenizes, trains and evaluates roBERTa models for different max_lengths\n",
-        "  \"\"\"\n",
-        "  print(f\"This model will be saved as {model_name}\")\n",
-        "  print(f'Running roBERTa for encoding max_length: {length}')\n",
-        "  print('Tokenizing data...')\n",
-        "  train_encodings_roberta = tokenize(length, x_train, tokenizer)\n",
-        "  valid_encodings_roberta = tokenize(length, x_valid, tokenizer)\n",
-        "  test_encodings_roberta = tokenize(length, x_test, tokenizer)\n",
-        "\n",
-        "  print(f'Created encoding for training data with shape {train_encodings_roberta.input_ids.shape}')\n",
-        "  print(f'Created encoding for validation data with shape {valid_encodings_roberta.input_ids.shape}')\n",
-        "  print(f'Created encoding for test data with shape {test_encodings_roberta.input_ids.shape}')\n",
-        "\n",
-        "  if optimizer:\n",
-        "    print(\"Using custom optimizer\")\n",
-        "    model = create_roberta_model(optimizer=optimizer)\n",
-        "  else:\n",
-        "    model = create_roberta_model()\n",
-        "  print('Training model...')\n",
-        "  history = model.fit(\n",
-        "    [train_encodings_roberta.input_ids, train_encodings_roberta.attention_mask], \n",
-        "    y_train,\n",
-        "    validation_data=(\n",
-        "        [valid_encodings_roberta.input_ids, valid_encodings_roberta.attention_mask], \n",
-        "        y_valid\n",
-        "        ),\n",
-        "    batch_size=32, \n",
-        "    epochs=4\n",
-        "  )\n",
-        "\n",
-        "  print('Evaluating model...')\n",
-        "  score = model.evaluate([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask], y_test)\n",
-        "\n",
-        "  print(\"Test loss:\", score[0])\n",
-        "  print(\"Test accuracy:\", score[1])\n",
-        "\n",
-        "  predictions = model.predict([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask])\n",
-        "  preds = predictions.to_tuple()[0].argmax(1)\n",
-        "  print('\\n Classification Report:\\n')\n",
-        "  print(classification_report(y_test, preds))\n",
-        "\n",
-        "  model.save(\n",
-        "    str.format(\"/content/drive/My Drive/models/Project W266/{name}\", name = model_name),\n",
-        "    overwrite=True,\n",
-        "    include_optimizer=True,\n",
-        "    save_format=None,\n",
-        "    signatures=None,\n",
-        "    options=None,\n",
-        "    save_traces=True\n",
-        "  )\n"
-      ],
-      "metadata": {
-        "id": "qMRC9B0RZLbO"
-      },
-      "execution_count": 8,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Running for various lengths of embeddings"
-      ],
-      "metadata": {
-        "id": "8YHIQu-2vybh"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "max_lengths = [64,128,256,320,384,448,512]\n",
-        "for length in max_lengths:\n",
-        "  run_roberta(length, roberta_tokenizer, str.format('roberta_model_{length}', length = length), \n",
-        "                                                    x_train = x_train,\n",
-        "                                                    y_train = y_train,\n",
-        "                                                    x_valid = x_valid,\n",
-        "                                                    y_valid = y_valid,\n",
-        "                                                    x_test = x_test,\n",
-        "                                                    y_test = y_test)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "oQ5nBUvuXY76",
-        "outputId": "4d198942-24e8-4db7-be15-806593e27021"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 64\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 64)\n",
-            "Created encoding for validation data with shape (5893, 64)\n",
-            "Created encoding for test data with shape (5894, 64)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 170s 111ms/step - loss: 0.6682 - accuracy: 0.6166 - val_loss: 0.6404 - val_accuracy: 0.6959\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 161s 109ms/step - loss: 0.6302 - accuracy: 0.6747 - val_loss: 0.6023 - val_accuracy: 0.7168\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 161s 109ms/step - loss: 0.6068 - accuracy: 0.6947 - val_loss: 0.5813 - val_accuracy: 0.7175\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 161s 110ms/step - loss: 0.5971 - accuracy: 0.7027 - val_loss: 0.5661 - val_accuracy: 0.7310\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 17s 93ms/step - loss: 0.5791 - accuracy: 0.7128\n",
-            "Test loss: 0.579146146774292\n",
-            "Test accuracy: 0.7127587199211121\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.73      0.70      0.71      2992\n",
-            "           1       0.70      0.73      0.71      2902\n",
-            "\n",
-            "    accuracy                           0.71      5894\n",
-            "   macro avg       0.71      0.71      0.71      5894\n",
-            "weighted avg       0.71      0.71      0.71      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_64/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_64/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 128\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 128)\n",
-            "Created encoding for validation data with shape (5893, 128)\n",
-            "Created encoding for test data with shape (5894, 128)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 311s 207ms/step - loss: 0.6516 - accuracy: 0.6479 - val_loss: 0.6158 - val_accuracy: 0.7147\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 301s 204ms/step - loss: 0.6063 - accuracy: 0.7033 - val_loss: 0.5769 - val_accuracy: 0.7417\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 302s 205ms/step - loss: 0.5807 - accuracy: 0.7279 - val_loss: 0.5515 - val_accuracy: 0.7528\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 302s 205ms/step - loss: 0.5669 - accuracy: 0.7367 - val_loss: 0.5378 - val_accuracy: 0.7545\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 32s 171ms/step - loss: 0.5464 - accuracy: 0.7465\n",
-            "Test loss: 0.5463767051696777\n",
-            "Test accuracy: 0.7465218901634216\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.73      0.80      0.76      2992\n",
-            "           1       0.77      0.70      0.73      2902\n",
-            "\n",
-            "    accuracy                           0.75      5894\n",
-            "   macro avg       0.75      0.75      0.75      5894\n",
-            "weighted avg       0.75      0.75      0.75      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_128/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_128/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 256\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 256)\n",
-            "Created encoding for validation data with shape (5893, 256)\n",
-            "Created encoding for test data with shape (5894, 256)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 616s 414ms/step - loss: 0.6490 - accuracy: 0.6703 - val_loss: 0.6157 - val_accuracy: 0.7524\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 608s 412ms/step - loss: 0.5963 - accuracy: 0.7340 - val_loss: 0.5648 - val_accuracy: 0.7592\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 607s 412ms/step - loss: 0.5658 - accuracy: 0.7466 - val_loss: 0.5341 - val_accuracy: 0.7668\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 607s 412ms/step - loss: 0.5511 - accuracy: 0.7503 - val_loss: 0.5157 - val_accuracy: 0.7750\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 62s 336ms/step - loss: 0.5287 - accuracy: 0.7677\n",
-            "Test loss: 0.5286672711372375\n",
-            "Test accuracy: 0.7677298784255981\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.78      0.76      0.77      2992\n",
-            "           1       0.76      0.78      0.77      2902\n",
-            "\n",
-            "    accuracy                           0.77      5894\n",
-            "   macro avg       0.77      0.77      0.77      5894\n",
-            "weighted avg       0.77      0.77      0.77      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_256/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_256/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 320\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 320)\n",
-            "Created encoding for validation data with shape (5893, 320)\n",
-            "Created encoding for test data with shape (5894, 320)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_7/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 791s 533ms/step - loss: 0.6482 - accuracy: 0.6892 - val_loss: 0.6130 - val_accuracy: 0.7582\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 782s 531ms/step - loss: 0.5887 - accuracy: 0.7438 - val_loss: 0.5564 - val_accuracy: 0.7651\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 782s 531ms/step - loss: 0.5604 - accuracy: 0.7532 - val_loss: 0.5269 - val_accuracy: 0.7736\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 782s 531ms/step - loss: 0.5427 - accuracy: 0.7554 - val_loss: 0.5102 - val_accuracy: 0.7724\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 79s 430ms/step - loss: 0.5212 - accuracy: 0.7703\n",
-            "Test loss: 0.5211576223373413\n",
-            "Test accuracy: 0.7702748775482178\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.79      0.78      2992\n",
-            "           1       0.78      0.75      0.76      2902\n",
-            "\n",
-            "    accuracy                           0.77      5894\n",
-            "   macro avg       0.77      0.77      0.77      5894\n",
-            "weighted avg       0.77      0.77      0.77      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_320/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_320/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 384)\n",
-            "Created encoding for validation data with shape (5893, 384)\n",
-            "Created encoding for test data with shape (5894, 384)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_8/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 982s 663ms/step - loss: 0.6472 - accuracy: 0.6916 - val_loss: 0.6131 - val_accuracy: 0.7612\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 975s 662ms/step - loss: 0.5905 - accuracy: 0.7461 - val_loss: 0.5576 - val_accuracy: 0.7684\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 975s 662ms/step - loss: 0.5599 - accuracy: 0.7529 - val_loss: 0.5266 - val_accuracy: 0.7719\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 976s 662ms/step - loss: 0.5438 - accuracy: 0.7550 - val_loss: 0.5101 - val_accuracy: 0.7746\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 99s 532ms/step - loss: 0.5214 - accuracy: 0.7715\n",
-            "Test loss: 0.521388590335846\n",
-            "Test accuracy: 0.7714625000953674\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.77      0.78      0.78      2992\n",
-            "           1       0.77      0.76      0.77      2902\n",
-            "\n",
-            "    accuracy                           0.77      5894\n",
-            "   macro avg       0.77      0.77      0.77      5894\n",
-            "weighted avg       0.77      0.77      0.77      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 448\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 448)\n",
-            "Created encoding for validation data with shape (5893, 448)\n",
-            "Created encoding for test data with shape (5894, 448)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_9/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 1163s 785ms/step - loss: 0.6457 - accuracy: 0.6958 - val_loss: 0.6102 - val_accuracy: 0.7629\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 1155s 784ms/step - loss: 0.5877 - accuracy: 0.7482 - val_loss: 0.5546 - val_accuracy: 0.7679\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 1155s 784ms/step - loss: 0.5556 - accuracy: 0.7557 - val_loss: 0.5229 - val_accuracy: 0.7733\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 1154s 783ms/step - loss: 0.5434 - accuracy: 0.7557 - val_loss: 0.5081 - val_accuracy: 0.7752\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 115s 622ms/step - loss: 0.5193 - accuracy: 0.7706\n",
-            "Test loss: 0.5192633867263794\n",
-            "Test accuracy: 0.7706142067909241\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.77      0.79      0.78      2992\n",
-            "           1       0.77      0.75      0.76      2902\n",
-            "\n",
-            "    accuracy                           0.77      5894\n",
-            "   macro avg       0.77      0.77      0.77      5894\n",
-            "weighted avg       0.77      0.77      0.77      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_448/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_448/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Running roBERTa for encoding max_length: 512\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 512)\n",
-            "Created encoding for validation data with shape (5893, 512)\n",
-            "Created encoding for test data with shape (5894, 512)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_10/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 1383s 935ms/step - loss: 0.6452 - accuracy: 0.6976 - val_loss: 0.6134 - val_accuracy: 0.7256\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 1375s 933ms/step - loss: 0.5870 - accuracy: 0.7472 - val_loss: 0.5538 - val_accuracy: 0.7645\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 1375s 933ms/step - loss: 0.5583 - accuracy: 0.7512 - val_loss: 0.5237 - val_accuracy: 0.7719\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 1375s 933ms/step - loss: 0.5403 - accuracy: 0.7590 - val_loss: 0.5072 - val_accuracy: 0.7728\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 136s 735ms/step - loss: 0.5183 - accuracy: 0.7710\n",
-            "Test loss: 0.5182607769966125\n",
-            "Test accuracy: 0.7709535360336304\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.81      0.78      2992\n",
-            "           1       0.79      0.73      0.76      2902\n",
-            "\n",
-            "    accuracy                           0.77      5894\n",
-            "   macro avg       0.77      0.77      0.77      5894\n",
-            "weighted avg       0.77      0.77      0.77      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_512/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_512/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Trying out a linear decay learning rate"
-      ],
-      "metadata": {
-        "id": "fQ0Cv7Nuv4EZ"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def create_learning_schedule(initial_learning_rate=2e-5):\n",
-        "  # Set up epochs and steps\n",
-        "  epochs = 4\n",
-        "  batch_size = 32\n",
-        "\n",
-        "  train_data_size = len(x_train)\n",
-        "  steps_per_epoch = int(train_data_size / batch_size)\n",
-        "  num_train_steps = steps_per_epoch * epochs\n",
-        "  # Using 6% of the data for warm up as this is what was done in the roBERTa paper\n",
-        "  warmup_steps = int(0.06 * num_train_steps)\n",
-        "\n",
-        "  linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(\n",
-        "      initial_learning_rate=initial_learning_rate,\n",
-        "      end_learning_rate=0,\n",
-        "      decay_steps=num_train_steps)\n",
-        "\n",
-        "  warmup_schedule = tfm.optimization.lr_schedule.LinearWarmup(\n",
-        "      warmup_learning_rate = 0,\n",
-        "      after_warmup_lr_sched = linear_decay,\n",
-        "      warmup_steps = warmup_steps\n",
-        "  )\n",
-        "  return warmup_schedule\n"
-      ],
-      "metadata": {
-        "id": "0sob0lWav8xV"
-      },
-      "execution_count": 9,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Try it on our our previous best max_length of 384\n",
-        "max_lengths = [384]\n",
-        "# Learning rates from RoBERTa paper:\n",
-        "learning_rates = [1e-5, 2e-5, 3e-5]\n",
-        "for initial_learning_rate in learning_rates:\n",
-        "  warmup_schedule = create_learning_schedule(initial_learning_rate)\n",
-        "  for length in max_lengths:\n",
-        "    model_name = str.format('roberta_model_{length}_lr-{learning_rate}', length=length, learning_rate=initial_learning_rate)\n",
-        "    run_roberta(length, roberta_tokenizer, model_name, \n",
-        "                                                    x_train = x_train_bilal,\n",
-        "                                                    y_train = y_train_bilal,\n",
-        "                                                    x_valid = x_valid_bilal,\n",
-        "                                                    y_valid = y_valid_bilal,\n",
-        "                                                    x_test = x_test_bilal,\n",
-        "                                                    y_test = y_test_bilal,\n",
-        "                                                    optimizer = tf.keras.optimizers.experimental.Adam(\n",
-        "      learning_rate = warmup_schedule))"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Hf3eu-8LyrNW",
-        "outputId": "94bb79cd-07f6-465c-d9ce-21f059960bf5"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_384_lr-1e-05\n",
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 384)\n",
-            "Created encoding for validation data with shape (5893, 384)\n",
-            "Created encoding for test data with shape (5894, 384)\n",
-            "Using custom optimizer\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1474/1474 [==============================] - 929s 626ms/step - loss: 0.6733 - accuracy: 0.6328 - val_loss: 0.6595 - val_accuracy: 0.7534\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 920s 624ms/step - loss: 0.6401 - accuracy: 0.7245 - val_loss: 0.6346 - val_accuracy: 0.7567\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 920s 624ms/step - loss: 0.6261 - accuracy: 0.7354 - val_loss: 0.6213 - val_accuracy: 0.7616\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 920s 624ms/step - loss: 0.6159 - accuracy: 0.7409 - val_loss: 0.6170 - val_accuracy: 0.7614\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 92s 497ms/step - loss: 0.6213 - accuracy: 0.7603\n",
-            "Test loss: 0.6212723851203918\n",
-            "Test accuracy: 0.7602646946907043\n",
-            "185/185 [==============================] - 94s 492ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.77      0.77      2992\n",
-            "           1       0.76      0.75      0.75      2902\n",
-            "\n",
-            "    accuracy                           0.76      5894\n",
-            "   macro avg       0.76      0.76      0.76      5894\n",
-            "weighted avg       0.76      0.76      0.76      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-1e-05/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-1e-05/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_384_lr-2e-05\n",
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 384)\n",
-            "Created encoding for validation data with shape (5893, 384)\n",
-            "Created encoding for test data with shape (5894, 384)\n",
-            "Using custom optimizer\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1474/1474 [==============================] - 929s 626ms/step - loss: 0.6616 - accuracy: 0.6547 - val_loss: 0.6339 - val_accuracy: 0.7522\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 920s 624ms/step - loss: 0.6094 - accuracy: 0.7393 - val_loss: 0.5894 - val_accuracy: 0.7638\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 920s 624ms/step - loss: 0.5862 - accuracy: 0.7479 - val_loss: 0.5689 - val_accuracy: 0.7657\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 920s 624ms/step - loss: 0.5730 - accuracy: 0.7504 - val_loss: 0.5629 - val_accuracy: 0.7679\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 92s 497ms/step - loss: 0.5704 - accuracy: 0.7615\n",
-            "Test loss: 0.5704007148742676\n",
-            "Test accuracy: 0.761452317237854\n",
-            "185/185 [==============================] - 94s 493ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.77      0.77      2992\n",
-            "           1       0.76      0.76      0.76      2902\n",
-            "\n",
-            "    accuracy                           0.76      5894\n",
-            "   macro avg       0.76      0.76      0.76      5894\n",
-            "weighted avg       0.76      0.76      0.76      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-2e-05/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-2e-05/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_384_lr-3e-05\n",
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 384)\n",
-            "Created encoding for validation data with shape (5893, 384)\n",
-            "Created encoding for test data with shape (5894, 384)\n",
-            "Using custom optimizer\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1474/1474 [==============================] - 930s 626ms/step - loss: 0.6487 - accuracy: 0.6821 - val_loss: 0.6062 - val_accuracy: 0.7575\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 921s 625ms/step - loss: 0.5841 - accuracy: 0.7453 - val_loss: 0.5541 - val_accuracy: 0.7675\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 921s 625ms/step - loss: 0.5601 - accuracy: 0.7537 - val_loss: 0.5349 - val_accuracy: 0.7689\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 921s 625ms/step - loss: 0.5526 - accuracy: 0.7544 - val_loss: 0.5305 - val_accuracy: 0.7709\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 92s 497ms/step - loss: 0.5402 - accuracy: 0.7667\n",
-            "Test loss: 0.5402488708496094\n",
-            "Test accuracy: 0.7667118906974792\n",
-            "185/185 [==============================] - 94s 492ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.78      0.77      2992\n",
-            "           1       0.77      0.75      0.76      2902\n",
-            "\n",
-            "    accuracy                           0.77      5894\n",
-            "   macro avg       0.77      0.77      0.77      5894\n",
-            "weighted avg       0.77      0.77      0.77      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-3e-05/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-3e-05/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Extra-large data set\n",
-        "Trying out the best RoBERTa model on a larger data set (600k) to see if it brings much improvement "
-      ],
-      "metadata": {
-        "id": "_aUphf_CFU1K"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Larger data set:\n",
-        "train = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_train_large.csv\")\n",
-        "test = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_test_large.csv\")\n",
-        "valid = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_valid_large.csv\")\n",
-        "\n",
-        "x_train = list(train[['text']].text)\n",
-        "y_train = np.asarray(train[['label']].label)\n",
-        "\n",
-        "x_test = list(test[['text']].text)\n",
-        "y_test = np.asarray(test[['label']].label)\n",
-        "\n",
-        "x_valid = list(valid[['text']].text)\n",
-        "y_valid = np.asarray(valid[['label']].label)\n",
-        "\n",
-        "len(x_train)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "hOWQ9sr0-ldC",
-        "outputId": "159152df-2fe1-4a6d-855f-95803e169574"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "471465"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 9
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "max_lengths = [384]\n",
-        "for length in max_lengths:\n",
-        "  run_roberta(length, roberta_tokenizer, 'roberta_384_large')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 835,
-          "referenced_widgets": [
-            "ff91ad40e0cb44bcae67dd2eb9ff7dbe",
-            "69a8b638873d484694629efb8b988e93",
-            "e92cd265403c4417b787742f8fe2756a",
-            "e1c4ac149c324ceeae62d0528b1dd025",
-            "a78d6c7b85ba44e78160f5d054ad1288",
-            "738de4b5332942749435e1539cd90098",
-            "c174dbb3fbfd426687040e83e9f12421",
-            "b21b352052da4de8af45fcfabd237302",
-            "8ca96f2c389f45d58d8b60bb34765bb3",
-            "447b0a4d2c474f7e9777a23e72fc8d92",
-            "2900ac0595cc4a7fa79b4d95374c94f2"
-          ]
-        },
-        "id": "K133rKHA2JLu",
-        "outputId": "5231c8d5-2170-4b69-d2a1-6e90540af18d"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_384_large\n",
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (471465, 384)\n",
-            "Created encoding for validation data with shape (58933, 384)\n",
-            "Created encoding for test data with shape (58934, 384)\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "ff91ad40e0cb44bcae67dd2eb9ff7dbe",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/627M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "14734/14734 [==============================] - 9203s 624ms/step - loss: 0.5470 - accuracy: 0.7520 - val_loss: 0.4895 - val_accuracy: 0.7760\n",
-            "Epoch 2/4\n",
-            "14734/14734 [==============================] - 9201s 624ms/step - loss: 0.5098 - accuracy: 0.7652 - val_loss: 0.4803 - val_accuracy: 0.7806\n",
-            "Epoch 3/4\n",
-            "14734/14734 [==============================] - 9202s 625ms/step - loss: 0.5031 - accuracy: 0.7667 - val_loss: 0.4766 - val_accuracy: 0.7807\n",
-            "Epoch 4/4\n",
-            "14734/14734 [==============================] - 9207s 625ms/step - loss: 0.5001 - accuracy: 0.7677 - val_loss: 0.4753 - val_accuracy: 0.7819\n",
-            "Evaluating model...\n",
-            "1842/1842 [==============================] - 921s 500ms/step - loss: 0.4713 - accuracy: 0.7852\n",
-            "Test loss: 0.4712928533554077\n",
-            "Test accuracy: 0.7852004170417786\n",
-            "1842/1842 [==============================] - 915s 495ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.78      0.80      0.79     29388\n",
-            "           1       0.79      0.77      0.78     29546\n",
-            "\n",
-            "    accuracy                           0.79     58934\n",
-            "   macro avg       0.79      0.79      0.79     58934\n",
-            "weighted avg       0.79      0.79      0.79     58934\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_384_large/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_384_large/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "It does make an improvement (0.771 -> 0.785) but the time taken to run it is very long (~10 hours)"
-      ],
-      "metadata": {
-        "id": "g48tclwPFhPF"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Error analysis\n",
-        "Performing some error analysis on our best model so far (RoBERTa with max_length of 384 and a static learning rate)"
-      ],
-      "metadata": {
-        "id": "VeBACPNaFoHr"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# load previous best model to do some error analysis:\n",
-        "saved_model = create_roberta_model()\n",
-        "saved_model.load_weights('/content/drive/My Drive/models/Project W266/roberta_model_384')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 138,
-          "referenced_widgets": [
-            "8cade04bb95e44ed904539d741e64efe",
-            "73c292b4f50744e891eaae1d8d11a02c",
-            "fb5b3017503245f48f89d2bc4fe5fe0a",
-            "985a9b84851540f68cd8775b1bcf36c1",
-            "623b933656844b3c83ab1c4b1d130b16",
-            "4a2b38ab27584e3a97c1ccd66942e900",
-            "9e9f4becd9fe47638fc7883e83bf928d",
-            "1433fa72a097434180d2cb1927bf051a",
-            "b4a5e8c6a241479c9049c4b25a270f09",
-            "0995efa63b404fd49898c5318cf6379d",
-            "efd46accb7a348b0b619aef9d944d53f"
-          ]
-        },
-        "id": "xuZdJxGpD2sS",
-        "outputId": "57bc0274-bcd0-44e5-c008-659d78c32a84"
-      },
-      "execution_count": 9,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/627M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "8cade04bb95e44ed904539d741e64efe"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fa742ff13d0>"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 9
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "test_encodings_roberta = tokenize(384, x_test, roberta_tokenizer)\n",
-        "\n",
-        "print(f'Created encoding for test data with shape {test_encodings_roberta.input_ids.shape}')\n",
-        "\n",
-        "print('Evaluating model...')\n",
-        "score = saved_model.evaluate([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask], y_test)\n",
-        "\n",
-        "print(\"Test loss:\", score[0])\n",
-        "print(\"Test accuracy:\", score[1])\n",
-        "\n",
-        "predictions = saved_model.predict([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask])\n",
-        "preds = predictions.to_tuple()[0].argmax(1)\n",
-        "print('\\n Classification Report:\\n')\n",
-        "print(classification_report(y_test, preds))"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "7M3_37jwrrJN",
-        "outputId": "757f850c-28bd-44e3-9a8d-e708a22194ff"
-      },
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Created encoding for test data with shape (5894, 384)\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 98s 496ms/step - loss: 0.5705 - accuracy: 0.7613\n",
-            "Test loss: 0.5705196261405945\n",
-            "Test accuracy: 0.7612826824188232\n",
-            "185/185 [==============================] - 94s 492ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.77      0.75      0.76      2992\n",
-            "           1       0.75      0.77      0.76      2902\n",
-            "\n",
-            "    accuracy                           0.76      5894\n",
-            "   macro avg       0.76      0.76      0.76      5894\n",
-            "weighted avg       0.76      0.76      0.76      5894\n",
-            "\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "test_results = pd.DataFrame({'text': x_test, 'label': y_test, 'pred': preds})\n",
-        "test_results['text_length'] = [len(t) for t in test_results.text]\n",
-        "TP = test_results[(test_results.label == 1) & (test_results.pred == 1)]\n",
-        "FP = test_results[(test_results.label == 0) & (test_results.pred == 1)]\n",
-        "TN = test_results[(test_results.label == 0) & (test_results.pred == 0)]\n",
-        "FN = test_results[(test_results.label == 1) & (test_results.pred == 0)]\n",
-        "\n",
-        "print(f'TP size: {TP.shape[0]}')\n",
-        "print(f'FP size: {FP.shape[0]}')\n",
-        "print(f'TN size: {TN.shape[0]}')\n",
-        "print(f'FN size: {FN.shape[0]}')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "XvDiZ6jsF3a1",
-        "outputId": "6ebf4246-3e62-4ebf-d086-0045b897645a"
-      },
-      "execution_count": 41,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "TP size: 2240\n",
-            "FP size: 745\n",
-            "TN size: 2247\n",
-            "FN size: 662\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "print(f'Average review length for TP: {round(TP.text_length.mean(), 1)}')\n",
-        "print(f'Average review length for FP: {round(FP.text_length.mean(), 1)}')\n",
-        "print(f'Average review length for TN: {round(TN.text_length.mean(), 1)}')\n",
-        "print(f'Average review length for FN: {round(FN.text_length.mean(), 1)}')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "hVkN3tGENGRS",
-        "outputId": "97050e91-575f-4268-ca69-0f07b73187d9"
-      },
-      "execution_count": 44,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Average review length for TP: 1286.2\n",
-            "Average review length for FP: 881.8\n",
-            "Average review length for TN: 284.4\n",
-            "Average review length for FN: 407.3\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "fig, ax = plt.subplots(figsize=(8, 5));\n",
-        "ax.hist(TP.text_length, bins=40, color='g', alpha=0.6, label=\"True positives\")\n",
-        "ax.hist(FN.text_length, bins=40, color='r', alpha=0.6, label=\"False negatives\")\n",
-        "ax.set_xlabel(\"Length of Review\")\n",
-        "ax.set_ylabel(\"Number of Reviews\")\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 334
-        },
-        "id": "yiTltlKgOOHd",
-        "outputId": "81bfe810-9eb2-4832-dc00-4b7a123cbff0"
-      },
-      "execution_count": 70,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<Figure size 576x360 with 1 Axes>"
-            ],
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfQAAAE9CAYAAAD9MZD2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de7hVVbn48e8rkpCYIuAlUVEfjBBwhxu8UIbyKGWSF0TwxzEwFTymZhZmdSpTO1mesvR4AktDT6QoeddS86CYpXIRAfGGigmhIBaKqdzG74812W5xX9ba7LXXZu7v53nWs+Ycc6453zV0864x5phzREoJSZK0Zduq0gFIkqTNZ0KXJCkHTOiSJOWACV2SpBwwoUuSlAMmdEmScmDrSgewObp27Zp69OhR6TAkSWoxs2fPfj2l1G3T8i06offo0YNZs2ZVOgxJklpMRLxcV7ld7pIk5YAJXZKkHDChS5KUA1v0NXRJUt3Wrl3LkiVLePfddysdipqoQ4cOdO/enfbt2xe1vwldknJoyZIlbLfddvTo0YOIqHQ4KlFKiZUrV7JkyRL22muvoj5jl7sk5dC7775Lly5dTOZbqIigS5cuJfWwmNAlKadM5lu2Uv/7mdAlSc1u5cqVVFVVUVVVxS677MJuu+1Ws75mzZpKh1dj1qxZnHPOOQA8+OCD/OUvf6nZNnHiRK6//vpKhVYyr6FLUhsw/s7xzXq8ScMmNbi9S5cuzJ07F4ALL7yQTp068Y1vfKNm+7p169h668qnoOrqaqqrq4FCQu/UqROHHHIIAGeccUYlQyuZLXRJUosYO3YsZ5xxBgceeCDnn38+F154If/1X/9Vs71Pnz4sXrwYgN/+9rcMHDiQqqoqxo8fz/r16z90vB49enD++efTt29fBg4cyKJFiwBYvHgxhx9+OP369WPIkCH87W9/A+Dmm2+mT58+7L///hx66KFAIYkfffTRLF68mIkTJ3L55ZdTVVXFww8/XBPfM888w8CBA2vOu3jxYvr27QvA7Nmz+exnP8sBBxzA0KFDWbZsGQBXXHEFvXv3pl+/fowaNar5K7MOJnRJUotZsmQJf/nLX/jZz35W7z5PP/00U6dO5ZFHHmHu3Lm0a9eOKVOm1Lnv9ttvz/z58znrrLM499xzATj77LMZM2YM8+bNY/To0TVd6hdddBH33nsvTz75JHfccccHjtOjRw/OOOMMvva1rzF37lw+85nP1Gzr1asXa9as4aWXXgJg6tSpjBw5krVr13L22Wczbdo0Zs+ezZe//GW+853vAHDppZfyxBNPMG/ePCZOnNj0CitB5fs71KoU2y3XWHebJNVlxIgRtGvXrsF9HnjgAWbPns2AAQMAeOedd9hpp53q3Pekk06qef/a174GwF//+lduueUWAE4++WTOP/98AAYNGsTYsWM58cQTOf7440uK+8QTT2Tq1KlccMEFTJ06lalTp/Lss8+yYMECjjjiCADWr1/PrrvuCkC/fv0YPXo0xx57LMcee2xJ52oqE7okqcVsu+22Nctbb701GzZsqFnfeItWSokxY8bwox/9qNHj1R4J3tio8IkTJ/LYY49x9913c8ABBzB79uyi4x45ciQjRozg+OOPJyLo2bMn8+fPZ7/99uOvf/3rh/a/++67mTFjBnfeeSc//OEPmT9/ftnHDNjlLkmqiB49ejBnzhwA5syZU9OlPWTIEKZNm8by5csBeOONN3j55TonGGPq1Kk17wcffDAAhxxyCDfeeCMAU6ZMqek+f+GFFzjwwAO56KKL6NatG6+88soHjrXddtvx1ltv1XmeffbZh3bt2nHxxRczcuRIAD7xiU+wYsWKmoS+du1annrqKTZs2MArr7zCYYcdxo9//GNWrVrF6tWrm1ZJJbCFLkmqiOHDh3P99dez3377ceCBB7LvvvsC0Lt3by655BKOPPJINmzYQPv27bnqqqvYc889P3SMf/zjH/Tr149tttmGG264AYArr7ySU045hcsuu4xu3brxm9/8BoAJEybw/PPPk1JiyJAh7L///jz00EM1xxo2bBgnnHACt99+O1deeeWHzjVy5EgmTJhQ88PjIx/5CNOmTeOcc85h1apVrFu3jnPPPZd9992Xf/u3f2PVqlWklDjnnHPYYYcdmr3+NhUppbKfpFyqq6uT86E3L6+hS/nw9NNP88lPfrLSYZRVjx49mDVrFl27dq10KGVT13/HiJidUqredF+73CVJygG73CVJW6SN96yrwBa6JEk5YEKXJCkHTOiSJOWA19DVJI6Gl6TWxRa6JKks2rVrVzNlalVVVYOD2Dp16tRygW2GyZMn8/e//71m/bTTTmPhwoUVjOh9ttAlqS0Y37zTpzKp8d63jh071kyhmheTJ0+mT58+fPzjHwfg17/+dYUjep8tdElSi1i9ejVDhgyhf//+9O3bl9tvv/1D+yxbtoxDDz2Uqqoq+vTpw8MPPwzAfffdx8EHH0z//v0ZMWJEnY9SHTx4MN/85jcZOHAg++67b81n169fz4QJExgwYAD9+vVjUvZjZMOGDZx55pn06tWLI444gqOOOopp06YBhZnZBgwYQJ8+fRg3bhwpJaZNm8asWbMYPXo0VVVVvPPOOwwePJhZs2YxceJEJkyYUBPL5MmTOeuss4C6p4Jdv349Y8eOpU+fPvTt25fLL798s+vXhC5JKot33nmnprv9uOOOo0OHDtx6663MmTOH6dOn8/Wvf51Nn1b6u9/9jqFDhzJ37lyefPJJqqqqeP3117nkkkv405/+xJw5c6iurq53+tV169bx+OOP8/Of/5wf/OAHAFxzzTVsv/32zJw5k5kzZ/KrX/2Kl156iVtuuYXFixezcOFC/vd///cDk6ycddZZzJw5kwULFvDOO+9w1113ccIJJ1BdXc2UKVOYO3cuHTt2rNl/+PDh3HrrrTXrU6dOZdSoUfVOBTt37lyWLl3KggULmD9/Pqeccspm17dd7pKksti0y33t2rV8+9vfZsaMGWy11VYsXbqU1157jV122aVmnwEDBvDlL3+ZtWvXcuyxx1JVVcVDDz3EwoULGTRoEABr1qypmYhlUxunRT3ggANqrtnfd999zJs3r6b1vWrVKp5//nn+/Oc/M2LECLbaait22WUXDjvssJrjTJ8+nZ/85Cf861//4o033mC//fZj2LBh9X7Xbt26sffee/Poo4/Ss2dPnnnmGQYNGsRVV11V51Sww4YN48UXX+Tss8/mC1/4AkceeWQTaviDTOhtRLGj0iWpXKZMmcKKFSuYPXs27du3p0ePHjVTpm506KGHMmPGDO6++27Gjh3LeeedR+fOnTniiCNqJl9pyDbbbAMUBuStW7cOKEzHeuWVVzJ06NAP7HvPPffUeYx3332XM888k1mzZrH77rtz4YUXfijOuowaNYqbbrqJXr16cdxxxxERDU4F++STT3LvvfcyceJEbrrpJq699tpGz9EQu9wlSS1i1apV7LTTTrRv357p06fXOSXqyy+/zM4778zpp5/Oaaedxpw5czjooIN45JFHWLRoEQBvv/02zz33XNHnHTp0KL/85S9Zu3YtAM899xxvv/02gwYN4ve//z0bNmzgtdde48EHHwTen5e9a9eurF69uqZlDw1PsXrcccdx++23c8MNNzBq1Cig/qlgX3/9dTZs2MDw4cO55JJLaqaR3Ry20CVJLWL06NEMGzaMvn37Ul1dTa9evT60z4MPPshll11G+/bt6dSpE9dffz3dunVj8uTJnHTSSbz33nsAXHLJJTXTrTbmtNNOY/HixfTv35+UEt26deO2225j+PDhPPDAA/Tu3Zvdd9+d/v37s/3227PDDjtw+umn06dPH3bZZZea7nKAsWPHcsYZZ9CxY8cPXHMH6Ny5M5/85CdZuHAhAwcOBOqfCrZjx46ccsopbNiwAaDOFnypnD61jahUl7sPlpEqoy1Mn9ocVq9eTadOnVi5ciUDBw7kkUce+cA1/UorZfpUW+iSpDbr6KOP5p///Cdr1qzhu9/9bqtK5qUyoUuS2qyN183zwIS+hXP0uiQJyjjKPSJ2j4jpEbEwIp6KiK9m5TtGxP0R8Xz23jkrj4i4IiIWRcS8iOhfrtgkqS3YksdIqfT/fuW8bW0d8PWUUm/gIOArEdEbuAB4IKXUE3ggWwf4PNAze40DflnG2CQp1zp06MDKlStN6luolBIrV66kQ4cORX+mbF3uKaVlwLJs+a2IeBrYDTgGGJztdh3wIPDNrPz6VPi/79GI2CEids2OI0kqQffu3VmyZAkrVqyodChqog4dOtC9e/ei92+Ra+gR0QP4FPAYsHOtJP0qsHO2vBvwSq2PLcnKTOiSVKL27duz1157VToMtaCyPykuIjoBvwfOTSm9WXtb1hovqT8oIsZFxKyImOUvT0mSCsqa0COiPYVkPiWldEtW/FpE7Jpt3xVYnpUvBXav9fHuWdkHpJSuTilVp5Squ3XrVr7gJUnagpRzlHsA1wBPp5Rqz3N3BzAmWx4D3F6r/EvZaPeDgFVeP5ckqTjlvIY+CDgZmB8RG+fP+zZwKXBTRJwKvAycmG27BzgKWAT8C9j8yWElSWojyjnK/c9A1LN5SB37J+Ar5YpHkqQ8c/pUSZJywIQuSVIOmNAlScoBE7okSTlgQpckKQdM6JIk5YAJXZKkHDChS5KUAyZ0SZJywIQuSVIOmNAlScoBE7okSTlgQpckKQdM6JIk5YAJXZKkHDChS5KUAyZ0SZJywIQuSVIOmNAlScoBE7okSTlgQpckKQdM6JIk5YAJXZKkHNi60gEo38bfOb6o/SYNm1TmSCQp32yhS5KUAyZ0SZJywIQuSVIOmNAlScoBE7okSTlgQpckKQdM6JIk5YAJXZKkHDChS5KUAyZ0SZJywIQuSVIOmNAlScoBE7okSTlgQpckKQdM6JIk5YAJXZKkHDChS5KUAyZ0SZJywIQuSVIOlJTQI2KriPhYuYKRJElN02hCj4jfRcTHImJbYAGwMCImlD80SZJUrGJa6L1TSm8CxwJ/APYCTi5rVJIkqSTFJPT2EdGeQkK/I6W0FkjlDUuSJJWimIQ+CVgMbAvMiIg9gTfLGZQkSSpNowk9pXRFSmm3lNJRKaUE/A04rPyhSZKkYhUzKO6FiJgSEWdExH6pYF0Rn7s2IpZHxIJaZRdGxNKImJu9jqq17VsRsSgino2IoU3/SpIktT1FDYqj0O3eBbgsS/C3FvG5ycDn6ii/PKVUlb3uAYiI3sAoYL/sM/8TEe2K+QKSJKm4hL4eWJu9bwCWZ68GpZRmAG8UGccxwI0ppfdSSi8Bi4CBRX5WkqQ2r5iE/ibwc+AlYExK6eCU0vjNOOdZETEv65LvnJXtBrxSa58lWZkkSSpCMQn9JGAGcCZwY0T8ICKGNPF8vwT2AaqAZcBPSz1ARIyLiFkRMWvFihVNDEOSpHwpZpT77SmlCcB44B5gLHBXU06WUnotpbQ+pbQB+BXvd6svBXavtWv3rKyuY1ydUqpOKVV369atKWFIkpQ7xYxy/31ELAJ+AXwU+BLQueFP1XusXWutHkfhUbIAdwCjImKbiNgL6Ak83pRzSJLUFm1dxD4/Ap5IKa0v5cARcQMwGOgaEUuA7wODI6KKwpPmFlNo9ZNSeioibgIWAuuAr5R6PkmS2rJiEvpC4FsRsUdKaVxE9AQ+kVJqsNs9pXRSHcXXNLD/D4EfFhGPJEnaRDEJ/TfAbOCQbH0pcDNNvI6u4oy/c3NuJJAktTXFjHLfJ6X0Ewr3opNS+hcQZY1KkiSVpJiEviYiOpLNsBYR+wDvlTUqSZJUkmK63L8P/BHYPSKmAIMo3LomSZJaiUYTekrp/oiYAxxEoav9qyml18semSRJKlq9Xe4R0St77w/sSeHJbn8H9sjKJElSK9FQC/08YBx1P541AYeXJSJJklSyehN6Smlc9n5Yy4UjSZKaophHv86LiG9lo9slSVIrVMxta8MozIV+U0TMjIhvRMQeZY5LkiSVoJjZ1l5OKf0kpXQA8P+AfhTmRpckSa1EMfehExF7AiOz13rg/HIGJUmSStNoQo+Ix4D2FJ7fPiKl9GLZo5IkSSUppoX+pZTSs2WPRG1asZPRTBo2qcyRSNKWqZhBcf+MiGsi4g8AEdE7Ik4tc1ySJKkExST0ycC9wMez9eeAc8sVkCRJKl0xCb1rSukmYANASmkdhYFxkiSplSgmob8dEV14f/rUg4BVZY1KkiSVpJhBcecBdwD7RMQjQDfghLJGJUmSSlLM9KlzIuKzwCcoTJ/6LDCw3IFJkqTi1ZvQI6IdcCKwG/CHlNJTEXE0cDXQEfhUy4QoSZIa01AL/Rpgd+Bx4MqI+DtwAPCtlNJtLRGcJEkqTkMJvRrol1LaEBEdgFeBfVJKK1smNEmSVKyGRrmvSSltvFXtXeBFk7kkSa1TQy30XhExL1sOCqPc52XLKaXUr+zRSZKkojSU0D/ZYlFIkqTNUm9CTym93JKBSJKkpivmSXGSJKmVM6FLkpQD9Sb0iHgge/9xy4UjSZKaoqFBcbtGxCHAFyPiRgqj22uklOaUNTKpDuPvHF/UfpOGTSpzJJLUujSU0L8HfBfoDvxsk20JOLxcQUmSpNI0NMp9GjAtIr6bUrq4BWOSJEklKma2tYsj4ovAoVnRgymlu8obliRJKkWjo9wj4kfAV4GF2eurEfGf5Q5MkiQVr9EWOvAFoGrjc90j4jrgCeDb5QxMkiQVr9j70Heotbx9OQKRJElNV0wL/UfAExExncKta4cCF5Q1KkmSVJJiBsXdEBEPAgOyom+mlF4ta1SSJKkkxbTQSSktA+4ocyySJKmJfJa7JEk5YEKXJCkHGkzoEdEuIp5pqWAkSVLTNJjQU0rrgWcjYo8WikeSJDVBMYPiOgNPRcTjwNsbC1NKXyxbVJIkqSTFJPTvlj0KSZK0WYq5D/2hiNgT6JlS+lNEfBRoV/7QJElSsRpN6BFxOjAO2BHYB9gNmAgMKW9oUtONv3N8UftNGjapzJFIUsso5ra1rwCDgDcBUkrPAzuVMyhJklSaYhL6eymlNRtXImJrIDX2oYi4NiKWR8SCWmU7RsT9EfF89t45K4+IuCIiFkXEvIjo35QvI0lSW1VMQn8oIr4NdIyII4CbgTuL+Nxk4HOblF0APJBS6gk8wPuTvHwe6Jm9xgG/LOL4kiQpU0xCvwBYAcwHxgP3AP/R2IdSSjOANzYpPga4Llu+Dji2Vvn1qeBRYIeI2LWI2CRJEsWNct8QEdcBj1Hoan82pdRol3s9ds4megF4Fdg5W94NeKXWfkuysmVIkqRGNdpCj4gvAC8AVwD/DSyKiM9v7omzHwUl/zCIiHERMSsiZq1YsWJzw5AkKReK6XL/KXBYSmlwSumzwGHA5U0832sbu9Kz9+VZ+VJg91r7dc/KPiSldHVKqTqlVN2tW7cmhiFJUr4Uk9DfSiktqrX+IvBWE893BzAmWx4D3F6r/EvZaPeDgFW1uuYlSVIj6r2GHhHHZ4uzIuIe4CYKXeQjgJmNHTgibgAGA10jYgnwfeBS4KaIOBV4GTgx2/0e4ChgEfAv4JSmfBlJktqqhgbFDau1/Brw2Wx5BdCxsQOnlE6qZ9OHnjCXXU//SmPHzINin2AmSVIp6k3oKSVbyZIkbSGKeZb7XsDZQI/a+zt9qiRJrUcx06feBlxD4elwG8objiRJaopiEvq7KaUryh6JJElqsmIS+i8i4vvAfcB7GwtTSnPKFpXUQpxmVVJeFJPQ+wInA4fzfpd7ytYlSVIrUExCHwHsXXsKVUmS1LoU86S4BcAO5Q5EkiQ1XTEt9B2AZyJiJh+8hu5ta5IktRLFJPTvlz0KSZK0WYqZD/2hlghEkiQ1XTFPinuL9+ct/wjQHng7pfSxcgYmSZKKV0wLfbuNyxERwDHAQeUMSpIklaaYUe41UsFtwNAyxSNJkpqgmC7342utbgVUA++WLSJJklSyYka5154XfR2wmEK3uyRJaiWKuYbuvOiSJLVy9Sb0iPheA59LKaWLyxCPJElqgoZa6G/XUbYtcCrQBTChS5LUStSb0FNKP924HBHbAV8FTgFuBH5a3+ckSVLLa/AaekTsCJwHjAauA/qnlP7REoFJkqTiNXQN/TLgeOBqoG9KaXWLRSVJkkrS0INlvg58HPgP4O8R8Wb2eisi3myZ8CRJUjEauoZe0lPkJElS5Zi0JUnKARO6JEk5YEKXJCkHTOiSJOWACV2SpBwwoUuSlAMmdEmScsCELklSDpjQJUnKARO6JEk5YEKXJCkHGpw+VVJpxt85vqj9Jg2bVOZIJLU1ttAlScoBW+hSEYpteUtSpdhClyQpB2yhSxXgtXZJzc0WuiRJOWBClyQpB0zokiTlgAldkqQcMKFLkpQDJnRJknLAhC5JUg6Y0CVJygETuiRJOVCRJ8VFxGLgLWA9sC6lVB0ROwJTgR7AYuDElNI/KhGfJElbmko++vWwlNLrtdYvAB5IKV0aERdk69+sTGgta/RVM2qWp3zl0ApGIknaUrWmLvdjgOuy5euAYysYiyRJW5RKJfQE3BcRsyNiXFa2c0ppWbb8KrBzZUKTJGnLU6ku90+nlJZGxE7A/RHxTO2NKaUUEamuD2Y/AMYB7LHHHuWPVJKkLUBFEnpKaWn2vjwibgUGAq9FxK4ppWURsSuwvJ7PXg1cDVBdXV1n0m91xr8/VebolwvXy71WLklqTi2e0CNiW2CrlNJb2fKRwEXAHcAY4NLs/faWjq1ZjS9uvmtJkppDJVroOwO3RsTG8/8upfTHiJgJ3BQRpwIvAydWIDZpizT+zuJ+QE4aNqnMkUiqlBZP6CmlF4H96yhfCQxp6XgkScqD1nTbmiRJaiITuiRJOWBClyQpB0zokiTlgAldkqQcqOTkLJIaUeztaJJkC12SpBwwoUuSlAN2uTc3H/kqSaoAW+iSJOWACV2SpBwwoUuSlANeQ28OXjeXJFWYCb1UG5P3pJabhnL0VTNqlqd85dAWO68kacthQm+qzWyV107SkiRtLq+hS5KUAyZ0SZJywIQuSVIOeA29lXEAnCSpKUzoUhtS7Oxtk4a13F0ckpqHCb2ZzHi5+Uet1zUS3ha8JKkuXkOXJCkHTOiSJOWAXe6SPsRr7dKWx4ReDJ/VLklq5exylyQpB0zoW7DRV83wmfCSJMAud0mbwWvtUuthC12SpBwwoUuSlAN2ueeAT4+TJJnQ6+OtapKkLYhd7pIk5YAJXZKkHDChS5KUA15Dl1R23q8ulZ8tdEmScsAWuqRWo9iWPNialzZlQs+Zuu5J9z515ZHd+Kqk1vj/nwldUq6V0uovhj8Q1FqZ0CWpBK2xZSaBCT3XnFpVktoOR7lLkpQDJnRJknLALndJ2gJ47V6NMaG3MfVdV6/rdjZvd5PKr7lH4avtMqE3YsbLbWNg2cbkbeKWtmy25NsuE7okqV7+QNhymND1AY3d6mY3vKTN4Q+E8ml1CT0iPgf8AmgH/DqldGmFQxLe0y7lTVu7dt8Wfki0qoQeEe2Aq4AjgCXAzIi4I6W0sEUCGN+2/gffXI211m3Nqy1rawmzuVUqAW/J/91a233oA4FFKaUXU0prgBuBYyockyRJrV6raqEDuwGv1FpfAhxYoVhUgnKMkq+vhe+IfEn6sEgpVTqGGhFxAvC5lNJp2frJwIEppbNq7TMOGJetfgJ4djNP2xV4fTOPIeuxuViPm886bB7WY/MoRz3umVLqtmlha2uhLwV2r7XePSurkVK6Gri6uU4YEbNSStXNdby2ynpsHtbj5rMOm4f12Dxash5b2zX0mUDPiNgrIj4CjALuqHBMkiS1eq2qhZ5SWhcRZwH3Urht7dqU0lMVDkuSpFavVSV0gJTSPcA9LXjKZuu+b+Osx+ZhPW4+67B5WI/No8XqsVUNipMkSU3T2q6hS5KkJmizCT0iPhcRz0bEooi4oNLxtDYRcW1ELI+IBbXKdoyI+yPi+ey9c1YeEXFFVpfzIqJ/rc+MyfZ/PiLGVOK7VFJE7B4R0yNiYUQ8FRFfzcqtyxJERIeIeDwinszq8QdZ+V4R8VhWX1OzwbRExDbZ+qJse49ax/pWVv5sRAytzDeqnIhoFxFPRMRd2bp12AQRsTgi5kfE3IiYlZVV9u86pdTmXhQG3L0A7A18BHgS6F3puFrTCzgU6A8sqFX2E+CCbPkC4MfZ8lHAH4AADgIey8p3BF7M3jtny50r/d1auB53Bfpny9sBzwG9rcuS6zGATtlye+CxrH5uAkZl5ROBf8+WzwQmZsujgKnZcu/s730bYK/s34F2lf5+LVyX5wG/A+7K1q3DptXjYqDrJmUV/btuqy10HzHbiJTSDOCNTYqPAa7Llq8Djq1Vfn0qeBTYISJ2BYYC96eU3kgp/QO4H/hc+aNvPVJKy1JKc7Llt4CnKTwR0bosQVYfq7PV9tkrAYcD07LyTetxY/1OA4ZERGTlN6aU3kspvQQsovDvQZsQEd2BLwC/ztYD67A5VfTvuq0m9LoeMbtbhWLZkuycUlqWLb8K7Jwt11ef1nMtWZflpyi0Lq3LEmVdxXOB5RT+4XsB+GdKaV22S+06qamvbPsqoAvW48+B84EN2XoXrMOmSsB9ETE7Ck8whQr/Xbe629a0ZUgppYjwFokiRUQn4PfAuSmlNwsNnQLrsjgppfVAVUTsANwK9KpwSFuUiDgaWJ5Smh0RgysdTw58OqW0NCJ2Au6PiGdqb6zE33VbbaE3+ohZ1em1rJuI7H15Vl5ffVrPQES0p5DMp6SUbsmKrcsmSin9E5gOHEyh63Jjw6R2ndTUV7Z9e2AlbbseBwFfjIjFFC4zHg78AuuwSVJKS7P35RR+YA6kwn/XbTWh+4jZprkD2DgKcwxwe63yL2UjOQ8CVmXdTvcCR0ZE52y055FZWZuRXXO8Bng6pfSzWpusyxJERLesZU5EdASOoDAeYTpwQrbbpvW4sX5PAP4vFUYh3QGMykZw7wX0BB5vmW9RWSmlb6WUuqeUelD4N+//UkqjsQ5LFhHbRsR2G5cp/D0uoNJ/15UeKVipF4VRh89RuA73nUrH0+bZK3IAAARWSURBVNpewA3AMmAthes6p1K4fvYA8DzwJ2DHbN8Arsrqcj5QXes4X6YwaGYRcEqlv1cF6vHTFK61zQPmZq+jrMuS67Ef8ERWjwuA72Xle1NIJouAm4FtsvIO2fqibPvetY71nax+nwU+X+nvVqH6HMz7o9ytw9Lrb28KI/2fBJ7amEMq/Xftk+IkScqBttrlLklSrpjQJUnKARO6JEk5YEKXJCkHTOiSJOWACV1qRSJideN7bdbxz42IjzbH+bL7kP+UzTY1cpNtkyPipWzbkxExpInnqI6IK5oao9SW+OhXqW05F/gt8K9mONanAFJKVfVsn5BSmhYRhwFXU3gASUlSSrOAWU0PUWo7bKFLrVxE7BMRf8wmgXg4Inpl5ZOzOZb/EhEvRsQJWflWEfE/EfFMNifzPRFxQkScA3wcmB4R02sd/4dZK/rRiNi5jvPvGBG3ZfM4PxoR/bLnV/8WGJC1wvdp4Cv8lWzCiWyClcsiYmZ2vPFZ+Y0R8YVa55ycxTw43p+3e9uIuDYK86I/ERHHZOV3R0S/bPmJiPhetnxRRJze9JqXtiwmdKn1uxo4O6V0APAN4H9qbduVwtPojgYuzcqOB3pQmLf6ZArPPCeldAXwd+CwlNJh2b7bAo+mlPYHZgB1JcAfAE+klPoB36YwDeRy4DTg4ZRSVUrphQbi/xxwW7Z8KoXHXg4ABgCnZ48PnQqcCJA9jnkIcPcmx/kOhcePDgQOAy7LHrv5MPCZiNgeWEfhmeUAn8m+k9Qm2OUutWJRmKXtEODmeH+Gtm1q7XJbSmkDsLBW6/rTwM1Z+au1W+N1WAPclS3PpvCM9E19GhgOkFL6v4joEhEfKyL8yyLiPylMOHFwVnYk0G9jbwKFCT96An8AfhER21D4ATAjpfROre+88bNfjIhvZOsdgD0oJPRzgJco/Ag4IhsnsFdK6dki4pRywYQutW5bUZivur7r1O/VWo569mnI2vT+85/X07z/Jmy8hn42cC1wAIUYz04pfWgCioh4EBgKjKQwG9iHdgGGb5qksxZ9NfAihXnSu1LoaZjdfF9Fav3scpdasZTSm8BLETECCrO3RcT+jXzsEWB4di19ZwoTcWz0FrBdiWE8DIzOzj8YeD2Lq1j/DWwVEUMpzCT171GYUpaI2DfrNodCt/spFLrK/1jHce4Fzs5msCMiNg7KWwO8AoygcL3+YQqXJuxuV5tiQpdal49GxJJar/MoJNNTI2LjzE7HNHKM31OYIW8hhYFrc4BV2bargT820g2/qQuBAyJiHoXr9GMa3v2Dsh6AS4DzgV9ncc2JiAXAJN7vFbgP+CzwpyxJb+pioD0wLyKeytY3ehhYnlJ6J1vunr1LbYazrUk5FBGdUkqrI6ILhakvB6WUXq10XJLKx2voUj7dFRE7AB8BLjaZS/lnC12SpBzwGrokSTlgQpckKQdM6JIk5YAJXZKkHDChS5KUAyZ0SZJy4P8D9AfR/rbPpVUAAAAASUVORK5CYII=\n"
-          },
-          "metadata": {
-            "needs_background": "light"
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "fig, ax = plt.subplots(figsize=(8, 5));\n",
-        "ax.hist(TN.text_length, bins=40, color='g', alpha=0.6, label=\"True negatives\")\n",
-        "ax.hist(FP.text_length, bins=40, color='r', alpha=0.6, label=\"False postives\")\n",
-        "ax.set_xlabel(\"Length of Review\")\n",
-        "ax.set_ylabel(\"Number of Reviews\")\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 334
-        },
-        "id": "DiDeqh1HObxe",
-        "outputId": "887771c3-11cb-4dcd-dea7-ca3a43b3b91d"
-      },
-      "execution_count": 71,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<Figure size 576x360 with 1 Axes>"
-            ],
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfQAAAE9CAYAAAD9MZD2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5RU1Zmw8ecFO6JiEJEQBRXiIjGg2I2NlyESkIl34y2IxkS8xR5zUWNixIyJTtTEGY0xZjTBmTjqikYJjtegQR0QNJ8XQCSKGvFCghdAEgUhKsj+/qjTbYlNd3VDdRWnn99atfqcXafOeWuzFm/tffbZO1JKSJKkjVuXSgcgSZLWnwldkqQcMKFLkpQDJnRJknLAhC5JUg6Y0CVJyoFNKh3A+thmm21S//79Kx2GJEkdZtasWW+klHqvXb5RJ/T+/fszc+bMSochSVKHiYgFzZXb5S5JUg6Y0CVJygETuiRJObBR30OXJJVm1apVLFy4kHfeeafSoahE3bp1o1+/ftTU1JR0vAldkjqBhQsXsuWWW9K/f38iotLhqBUpJZYuXcrChQsZMGBASZ+xy12SOoF33nmHXr16mcw3EhFBr1692tSjYkKXpE7CZL5xaeu/lwldklR2S5cupba2ltraWj75yU/St2/fpv333nuv0uGV7IorrmDlypVN+wcddBBvvvlmBSP6gPfQJakTarirYYOeb8KhE1p8v1evXsyZMweACy64gO7du/Pd73636f3Vq1ezySbVn5KuuOIKvvKVr7D55psDMHny5ApH9AFb6JKkijjhhBP4l3/5F/bcc0++973vccEFF3DZZZc1vb/LLrvw8ssvA/Cb3/yGPfbYg9raWhoaGnj//fc/cr7+/ftz/vnnM3ToUHbddVeeffZZAFasWMFJJ53EHnvsQV1dHXfccQcAK1eu5Oijj2bQoEEcccQR7Lnnnk2zj5522mnU19czePBgzj//fACuvPJKXn31VUaNGsWoUaOarvnGG28wfvx4rrrqqqZYir/LpZdeyrBhwxgyZEjTuVasWMHBBx/Mbrvtxi677MItt9yy3vVpQpckVczChQv54x//yOWXX77OY5555hluueUWHn74YebMmUPXrl258cYbmz12m222Yfbs2Zx22mlNCfXiiy9m33335bHHHmPq1KmcffbZrFixgquvvpqePXsyb948LrzwQmbNmtV0nosvvpiZM2cyd+5cHnzwQebOncvpp5/Odtttx9SpU5k6deqHrjt27FgmTpzYtD9x4kTGjh3LlClTeP7553nssceYM2cOs2bNYvr06dx7771st912PPnkkzz11FMccMAB61ONgAm9JA13NWzw7ilJEowZM4auXbu2eMwDDzzArFmzGDZsGLW1tTzwwAO8+OKLzR575JFHArD77rs3te6nTJnCJZdcQm1tLSNHjuSdd97hL3/5Cw899BDHHHMMUOgNGDJkSNN5Jk6cyNChQ6mrq+Ppp59m3rx5LcZYV1fH4sWLefXVV3nyySfp2bMn22+/PVOmTGHKlCnU1dUxdOhQnn32WZ5//nl23XVX7rvvPs455xxmzJhBjx49Sq2ydar+GxaSpNzaYostmrY32WQT1qxZ07Tf+MhWSolx48bxk5/8pNXzbbrppgB07dqV1atXN33+1ltv5TOf+UxJMb300ktcdtllPP744/Ts2ZMTTjihpMfHxowZw6RJk3j99dcZO3Zs07XPPfdcGho+2iicPXs2kydP5rzzzmP06NH88Ic/LCm+dbGFLkmqCv3792f27NlAIdm99NJLAIwePZpJkyaxePFiAP72t7+xYEGzC441a//99+cXv/gFKSUAnnjiCQCGDx/e1E0+b948/vSnPwGwbNkytthiC3r06MGiRYu45557ms615ZZbsnz58mavM3bsWG6++WYmTZrEmDFjmq597bXX8vbbbwPwyiuvNLXkN998c77yla9w9tlnN33v9WELXZJUFY466ihuuOEGBg8ezJ577smnP/1pAAYNGsRFF13Efvvtx5o1a6ipqeGqq65ixx13LOm8P/jBDzjzzDMZMmQIa9asYcCAAdx99918/etfZ9y4cQwaNIidd96ZwYMH06NHDwYOHEhdXR0777wz22+/PcOHD28616mnnsoBBxzQdC+92ODBg1m+fDl9+/Zl2223BWC//fbjmWeeYe+99wage/fu/OY3v2H+/PmcffbZdOnShZqaGn75y1+ud/1F4y+WjVF9fX3qiPXQG++ft/ZYhiRVq2eeeYbPfvazlQ6jqrz//vusWrWKbt268cILL/DP//zPPPfcc3zsYx+rdGhNmvt3i4hZKaX6tY+1hS5J6pRWrlzJqFGjWLVqFSklrr766qpK5m1lQpckdUpbbrklHdHL21EcFCdJUg6Y0CVJygETuiRJOWBClyQpB0zokqQO0bVr16YlU2tra5umZm1O9+7dOy6wFrz88svcdNNNTfszZ87k9NNPr2BE6+Yo93Yqntvd59MlbXSamYp0vUxo/f/BzTbbrGkJ1Y1FY0L/8pe/DEB9fT319R95BLwq2EKXJFXE22+/zejRo5uWO21c1rTYa6+9xogRI6itrWWXXXZhxowZQGHBlb333puhQ4cyZsyYpqlVi40cOZIzzjij6bOPPfYYUJg69vDDD2fIkCHstddezJ07F4AHH3ywqfegrq6O5cuXM378eGbMmEFtbS0/+9nPmDZtGocccghr1qyhf//+vPnmm03XGzhwIIsWLWLJkiUcddRRDBs2jGHDhvHwww+v8/wbkgldktQh/vGPfzQltCOOOIJu3bpx2223MXv2bKZOncp3vvMd1p699KabbmL//fdnzpw5PPnkk9TW1vLGG29w0UUXcf/99zN79mzq6+vXufzqypUrmTNnDldffTUnnXQSAOeffz51dXXMnTuXH//4xxx//PEAXHbZZVx11VXMmTOHGTNmsNlmm3HJJZewzz77MGfOHL797W83nbdLly4cdthh3HbbbQA8+uij7LjjjvTp04czzjiDb3/72zz++OPceuutnHLKKes8/4Zkl7skqUOs3eW+atUqvv/97zN9+nS6dOnCK6+8wqJFi/jkJz/ZdMywYcM46aSTWLVqFYcffji1tbU8+OCDzJs3r2mO9ffee69prvS1HXvssQCMGDGCZcuW8eabb/LQQw9x6623ArDvvvuydOlSli1bxvDhwznrrLM47rjjOPLII+nXr1+L32fs2LH86Ec/4sQTT+Tmm29uWmHt/vvv/9Byq8uWLePtt99u8/nbyoQuSaqIG2+8kSVLljBr1ixqamro37//R5YpHTFiBNOnT+f3v/89J5xwAmeddRY9e/bkC1/4Ar/97W9bvUZEtLhfbPz48Rx88MFMnjyZ4cOH84c//KHFc++9997Mnz+fJUuWcPvtt3PeeecBsGbNGh555BG6devW6vl33nnnVr9DqexylyRVxFtvvcUnPvEJampqmDp1arNLoi5YsIA+ffrwta99jVNOOYXZs2ez11578fDDDzN//nwAVqxYwZ///Odmr3HLLbcA8NBDD9GjRw969OjBPvvsw4033gjAtGnT2Gabbfj4xz/OCy+8wK677so555zDsGHDePbZZ1tcLjUiOOKIIzjrrLP47Gc/S69evYDCCmu/+MUvmo5r7JVo7vwbki10SVJFHHfccRx66KHsuuuu1NfXN9tanTZtGpdeeik1NTV0796dG264gd69e3Pddddx7LHH8u677wJw0UUXNS23Wqxbt27U1dWxatUqrr32WgAuuOACTjrpJIYMGcLmm2/O9ddfD8AVV1zB1KlT6dKlC4MHD+bAAw+kS5cudO3ald12240TTjiBurq6D51/7NixDBs2jOuuu66p7Morr+Qb3/gGQ4YMYfXq1YwYMYJf/epXzZ5/Q3L51BI0t3yqj61J2ph0xuVTR44cyWWXXVa1j5mVoi3Lp9rlLklSDtjlLknKpWnTplU6hA5lC12SpBwwoUtSJ7Exj5nqjNr672VC3wAa7mr40CA5Sao23bp1Y+nSpSb1jURKiaVLl37kWfaWeA9dkjqBfv36sXDhQpYsWVLpUFSibt26tWk2ORO6JHUCNTU1DBgwoNJhqIxM6G1gt7okqVp5D12SpBwoW0KPiO0jYmpEzIuIpyPijKx864i4LyKez/72zMojIq6MiPkRMTcihpYrNkmS8qacLfTVwHdSSoOAvYBvRMQgYDzwQEppIPBAtg9wIDAwe50K/LKMsUmSlCtlS+gppddSSrOz7eXAM0Bf4DDg+uyw64HDs+3DgBtSwSPAVhGxbbnikyQpTzrkHnpE9AfqgEeBPiml17K3Xgf6ZNt9gb8WfWxhViZJklpR9oQeEd2BW4EzU0rLit9LhRkO2jTLQUScGhEzI2Kmz1NKklRQ1oQeETUUkvmNKaX/zYoXNXalZ38XZ+WvANsXfbxfVvYhKaVrUkr1KaX63r17ly94SZI2IuUc5R7Ar4FnUkqXF711JzAu2x4H3FFUfnw22n0v4K2irnlJktSCck4sMxz4KvCniJiTlX0fuASYGBEnAwuAo7P3JgMHAfOBlcCJZYxNkqRcKVtCTyk9BMQ63h7dzPEJ+Ea54pEkKc+cKU6SpBwwoUuSlAMmdEmScsCELklSDpjQJUnKARO6JEk5YEKXJCkHTOiSJOWACV2SpBwwoUuSlAMmdEmScsCELklSDpjQJUnKARO6JEk5YEKXJCkHTOiSJOWACV2SpBwwoUuSlAMmdEmScsCELklSDpjQJUnKARO6JEk5YEKXJCkHTOiSJOWACV2SpBwwoUuSlAMmdEmScsCELklSDpjQJUnKARO6JEk5YEKXJCkHTOiSJOXAJpUOIE8a7mpo2p5w6IQKRiJJ6mxsoUuSlAMmdEmScsCELklSDpjQJUnKgTYl9IjoEhEfL1cwkiSpfVpN6BFxU0R8PCK2AJ4C5kXE2eUPTZIklaqUFvqglNIy4HDgHmAA8NWyRiVJktqklIReExE1FBL6nSmlVUAqb1iSJKktSplYZgLwMvAkMD0idgSWlTOoPHCSGUlSR2q1hZ5SujKl1DeldFBKKQF/AUaVP7T8aLir4UMJXpKkDa3VFnpEvAA8AswAZqSUngZWlzswSZJUupIGxVHodu8FXBoRL0TEbeUNS5IktUUpCf19YFX2dw2wOHtJkqQqUcqguGXAn4DLgf9KKS0tb0iSJKmtSmmhHwtMB74O3BwR/xYRo1v7UERcGxGLI+KporILIuKViJiTvQ4qeu/ciJgfEc9FxP7t+TKSJHVWrbbQU0p3AHdExM7AgcCZwPeAzVr56HXAfwI3rFX+s5TSZcUFETEIOAYYDGwH3B8Rn04pvV/Kl5AkqbMrZerXWyNiPvBzYHPgeKBna59LKU0H/lZiHIcBN6eU3k0pvQTMB/Yo8bOSJHV6pdxD/wnwxAZsLX8zIo4HZgLfSSn9HehL4dG4RguzMkmSVIJS7qHPA86NiGsAImJgRBzSzuv9EtgJqAVeA37a1hNExKkRMTMiZi5ZsqSdYUiSlC+lJPT/Ad4D/inbfwW4qD0XSyktSim9n1JaA/wXH3SrvwJsX3Rov6ysuXNck1KqTynV9+7duz1hSJKUO6Uk9J1SSv9B4Vl0UkorgWjPxSJi26LdIygsxwpwJ3BMRGwaEQOAgcBj7bmGJEmdUSn30N+LiM3IVliLiJ2Ad1v7UET8FhgJbBMRC4HzgZERUZud62WgASCl9HRETKTQvb8a+IYj3CVJKl0pCf184F5g+4i4ERgOnNDah1JKxzZT/OsWjr8YuLiEeCRJ0lpKeQ79voiYDexFoav9jJTSG2WPTJIklWyd99CziWSIiKHAjhRGpb8K7JCVSZKkKtFSC/0s4FSaf7QsAfuWJSJJktRm60zoKaVTs7+jOi4cSZLUHqVM/To3Wzhlp44ISJIktV0pz6EfSmEt9IkR8XhEfDcidihzXJIkqQ1aTegppQUppf9IKe0OfBkYArxU9sgkSVLJSnkOnYjYERibvd6nsHyqJEmqEq0m9Ih4FKgBfgeMSSm9WPaoJElSm5TSQj8+pfRc2SORJEntVsqguDcj4tcRcQ9ARAyKiJPLHJckSWqDUhL6dcAfgO2y/T8DZ5YrIEmS1HalJPRtUkoTgTUAKaXVFAbGSZKkKlFKQl8REb34YPnUvYC3yhqVJElqk1IGxZ0F3AnsFBEPA72BL5U1KkmS1CalLJ86OyI+D3yGwvKpzwF7lDswSZJUunUm9IjoChwN9AXuSSk9HRGHANcAmwF1HRNifjTc1dC0PeHQCRWMRJKUNy210H8NbA88BvwiIl4FdgfOTSnd3hHBSZKk0rSU0OuBISmlNRHRDXgd2CmltLRjQpMkSaVqaZT7eymlxkfV3gFeNJlLklSdWmqh7xwRc7PtoDDKfW62nVJKQ8oenSRJKklLCf2zHRaFJElaL+tM6CmlBR0ZiCRJar9SZoqTJElVzoQuSVIOrDOhR8QD2d9/77hwJElSe7Q0KG7biPgn4IsRcTOF0e1NUkqzyxqZJEkqWUsJ/YfAD4B+wOVrvZeAfcsVlCRJapuWRrlPAiZFxA9SShd2YEySJKmNSllt7cKI+CIwIiuallK6u7xhSZKktmh1lHtE/AQ4A5iXvc6IiB+XOzBJklS6VlvowMFAbeO87hFxPfAE8P1yBiZJkkpX6nPoWxVt9yhHIJIkqf1KaaH/BHgiIqZSeHRtBDC+rFFJkqQ2KWVQ3G8jYhowLCs6J6X0elmj6gQa7mpo2p5w6IQKRiJJyoNSWuiklF4D7ixzLFpbQ0PrxwBM8AeBJHV2zuUuSVIOmNAlScqBFhN6RHSNiGc7KhhJktQ+LSb0lNL7wHMRsUMHxSNJktqhlEFxPYGnI+IxYEVjYUrpi2WLSpIktUkpCf0HZY9CkiStl1KeQ38wInYEBqaU7o+IzYGu5Q9NkiSVqpTFWb4GTAIaH3buC9xezqAkSVLblPLY2jeA4cAygJTS88AnyhmUJElqm1IS+rsppfcadyJiEyCVLyRJktRWpST0ByPi+8BmEfEF4HfAXeUNS5IktUUpCX08sAT4E9AATAbOK2dQkiSpbUoZ5b4mIq4HHqXQ1f5cSqnVLveIuBY4BFicUtolK9sauAXoD7wMHJ1S+ntEBPBz4CBgJXBCSml2u76RJEmdUCmj3A8GXgCuBP4TmB8RB5Zw7uuAA9YqGw88kFIaCDzAB+uqHwgMzF6nAr8sJXhJklRQSpf7T4FRKaWRKaXPA6OAn7X2oZTSdOBvaxUfBlyfbV8PHF5UfkMqeATYKiK2LeULSJKk0maKW55Sml+0/yKwvJ3X65OtrQ7wOtAn2+4L/LXouIVZ2Wuoda6bLkmd3joTekQcmW3OjIjJwEQK99DHAI+v74VTSiki2vz4W0ScSqFbnh12cM0YSZKg5Rb6oUXbi4DPZ9tLgM3aeb1FEbFtSum1rEt9cVb+CrB90XH9srKPSCldA1wDUF9fn6vn4Rvu+qClPeFQW9OSpNKtM6GnlE4sw/XuBMYBl2R/7ygq/2ZE3AzsCbxV1DVfEcXJVZKkatfqPfSIGAB8i8KjZk3Ht7Z8akT8FhgJbBMRC4HzKSTyiRFxMrAAODo7fDKFR9bmU3hsrRw/JiRJyq1SBsXdDvyawuxwa0o9cUrp2HW8NbqZYxOFOeMlSVI7lJLQ30kpXVn2SCRJUruVktB/HhHnA1OAdxsLnclNkqTqUUpC3xX4KrAvH3S5p2xfkiRVgVIS+hjgU8VLqEqSpOpSytSvTwFblTsQSZLUfqW00LcCno2Ix/nwPfQWH1uTJEkdp5SEfn7Zo5AkSeullPXQH+yIQCRJUvuVMlPccgqj2gE+BtQAK1JKHy9nYJIkqXSltNC3bNyOiKCwdvle5QxKkiS1TSmj3JukgtuB/csUjyRJaodSutyPLNrtAtQD75QtIkmS1GaljHIvXhd9NfAyhW53SZJUJUq5h+5SppIkVbl1JvSI+GELn0sppQvLEE+n1HBXQwddqMTrTJhQ3jgkSRtcSy30Fc2UbQGcDPQCTOiSJFWJdSb0lNJPG7cjYkvgDOBE4Gbgp+v6nCRJ6ngt3kOPiK2Bs4DjgOuBoSmlv3dEYJIkqXQt3UO/FDgSuAbYNaX0dodFJUmS2qSliWW+A2wHnAe8GhHLstfyiFjWMeFJkqRStHQPvU2zyEmSpMoxaUuSlAMmdEmScqCUqV/V2TgBjSRtdGyhS5KUAyZ0SZJywIQuSVIOmNAlScoBE7okSTngKPcKOO6q6a0fdHcHLakqScoFW+iSJOWACV2SpBwwoUuSlAMmdEmScsBBcRuR6Qs+GEw3YscRFYxEklRtbKFLkpQDJnRJknLAhC5JUg6Y0CVJygEHxan9XDddkqqGLXRJknLAhC5JUg6Y0CVJygETuiRJOWBClyQpB0zokiTlgAldkqQcqMhz6BHxMrAceB9YnVKqj4itgVuA/sDLwNEppb9XIj5JkjY2lWyhj0op1aaU6rP98cADKaWBwAPZfqc1fcH0D62uJklSS6ppprjDgJHZ9vXANOCcSgVTLUzqkqRSVKqFnoApETErIk7NyvqklF7Ltl8H+lQmNEmSNj6VaqF/LqX0SkR8ArgvIp4tfjOllCIiNffB7AfAqQA77LBD+SOVJGkjUJEWekrplezvYuA2YA9gUURsC5D9XbyOz16TUqpPKdX37t27o0KWJKmqdXhCj4gtImLLxm1gP+Ap4E5gXHbYOOCOjo5NkqSNVSW63PsAt0VE4/VvSindGxGPAxMj4mRgAXB0BWJTObjMqiSVXYcn9JTSi8BuzZQvBUZ3dDySJOWBM8VJkpQDJnRJknLAhC5JUg6Y0CVJygETuiRJOWBClyQpB0zokiTlgAldkqQcMKFLkpQDJvSN1PQF010rXZLUxIQuSVIOmNAlScoBE7okSTlQieVTtQEV30cfseOICkYiSaokW+iSJOWACV2SpBwwoUuSlAMmdEmScsBBcaoeDQ2lHTdhQnnjkKSNkC10SZJywBb6BnTcVU7FWlVs8UvqRGyhS5KUAyZ0SZJywIQuSVIOmNAlScoBB8XliPO6S1LnZQtdkqQcMKFLkpQDdrlr41Pq8+WS1InYQpckKQdsoXciDppbT848J6mK2UKXJCkHTOiSJOWACV2SpBwwoUuSlAMmdEmScsCELklSDvjYWpGGu5ywRJK0cTKh51TxM+dqhTPPScoBu9w7qekLpreY9Ft7X5JUXWyhl+C4q0xskqTqZgtdkqQcsIXeyTm/ewU5N7ykDciELm1oDrKTVAEmdDVxEJwkbby8hy5JUg7YQpeqnffaJZXAhK4WNTdozoF0VWpD37v3B4K0Uam6LveIOCAinouI+RExvtLxSJK0MaiqhB4RXYGrgAOBQcCxETGoslFJklT9qq3LfQ9gfkrpRYCIuBk4DJhX0ai0Tna/59iGvnfvWICOYT13WtWW0PsCfy3aXwjsWaFYtJZSH2tb13GtJfzGzzV3nD8cOpFK/ZAoVaV+wGzo7+EPtvVThd83UkoddrHWRMSXgANSSqdk+18F9kwpfbPomFOBU7PdzwDPredltwHeWM9zqHnWbXlYr+VhvZaH9brh7ZhS6r12YbW10F8Bti/a75eVNUkpXQNcs6EuGBEzU0r1G+p8+oB1Wx7Wa3lYr+VhvXacqhoUBzwODIyIARHxMeAY4M4KxyRJUtWrqhZ6Sml1RHwT+APQFbg2pfR0hcOSJKnqVVVCB0gpTQYmd+AlN1j3vT7Cui0P67U8rNfysF47SFUNipMkSe1TbffQJUlSO3TqhO40s20TEddGxOKIeKqobOuIuC8ins/+9szKIyKuzOp2bkQMLfrMuOz45yNiXCW+SzWJiO0jYmpEzIuIpyPijKzcul0PEdEtIh6LiCezev23rHxARDya1d8t2QBcImLTbH9+9n7/onOdm5U/FxH7V+YbVZeI6BoRT0TE3dm+9VppKaVO+aIw6O4F4FPAx4AngUGVjquaX8AIYCjwVFHZfwDjs+3xwL9n2wcB9wAB7AU8mpVvDbyY/e2Zbfes9HercL1uCwzNtrcE/kxh6mPrdv3qNYDu2XYN8GhWXxOBY7LyXwGnZdtfB36VbR8D3JJtD8r+f9gUGJD9v9G10t+v0i/gLOAm4O5s33qt8Kszt9CbpplNKb0HNE4zq3VIKU0H/rZW8WHA9dn29cDhReU3pIJHgK0iYltgf+C+lNLfUkp/B+4DDih/9NUrpfRaSml2tr0ceIbCrInW7XrI6uftbLcmeyVgX2BSVr52vTbW9yRgdEREVn5zSundlNJLwHwK/390WhHRDzgY+O9sP7BeK64zJ/TmppntW6FYNmZ9UkqvZduvA32y7XXVr/Xegqw7so5Ca9K6XU9Zt/AcYDGFHzgvAG+mlFZnhxTXUVP9Ze+/BfTCem3OFcD3gDXZfi+s14rrzAldG1gq9KP52EQ7RUR34FbgzJTSsuL3rNv2SSm9n1KqpTDr5B7AzhUOaaMXEYcAi1NKsyodiz6sMyf0VqeZVUkWZd29ZH8XZ+Xrql/rvRkRUUMhmd+YUvrfrNi63UBSSm8CU4G9KdyiaJyDo7iOmuove78HsBTrdW3DgS9GxMsUblXuC/wc67XiOnNCd5rZDeNOoHE09TjgjqLy47MR2XsBb2Xdx38A9ouIntmo7f2ysk4ru5/4a+CZlNLlRW9Zt+shInpHxFbZ9mbAFyiMT5gKfCk7bO16bazvLwH/l/WM3Akck43WHgAMBB7rmG9RfVJK56aU+qWU+lP4f/P/UkrHYb1WXqVH5VXyRWG08J8p3Ff710rHU+0v4LfAa8AqCve7TqZwL+wB4HngfmDr7NgArsrq9k9AfdF5TqIwAGY+cGKlv1elX8DnKHSnzwXmZK+DrNv1rtchwBNZvT4F/DAr/xSFxDEf+B2waVbeLdufn73/qaJz/WtW388BB1b6u1XLCxjJB6PcrdcKv5wpTpKkHOjMXe6SJOWGCV2SpBwwoUuSlAMmdEmScsCELklSDpjQpSoSEW+3ftR6nf/MiNh8Q1wve374/oiYExFj13rvuoh4KXvvyYgY3c5r1KPAkn0AAAOFSURBVEfEle2NUepMNmn9EEk5cibwG2DlBjhXHUAqTK3anLNTSpMiYhRwDYWJQ9okpTQTmNn+EKXOwxa6VOUiYqeIuDciZkXEjIjYOSu/LlsX/Y8R8WJEfCkr7xIRV0fEs1FYR31yRHwpIk4HtgOmRsTUovNfnLWiH4mIPs1cf+uIuD0Ka68/EhFDIuITFH4YDMta4Tu18BX+H9miG9liKZdGxOPZ+Rqy8psj4uCia16XxTyyaL3tLSLi2iiscf5ERByWlf8+IoZk209ExA+z7R9FxNfaX/PSxsWELlW/a4BvpZR2B74LXF303rYUZpo7BLgkKzsS6E9hvemvUpi/nJTSlcCrwKiU0qjs2C2AR1JKuwHTgeYS4L8BT6SUhgDfp7B062LgFGBGSqk2pfRCC/EfANyebZ9MYaraYcAw4GvZtJ+3AEcDZFMxjwZ+v9Z5/pXCtKF7AKOASyNiC2AGsE9E9ABWU5hrHGCf7DtJnYJd7lIVy1Zg+yfgd4Up3wHYtOiQ21NKa4B5Ra3rzwG/y8pfL26NN+M94O5sexaF+c7X9jngKICU0v9FRK+I+HgJ4V8aET+msOjG3lnZfsCQxt4ECgt1DATuAX4eEZtS+AEwPaX0j6Lv3PjZL0bEd7P9bsAOFBL66cBLFH4EfCEbJzAgpfRcCXFKuWBCl6pbFwrrTK/rPvW7RduxjmNasip9MP/z+2zY/xMa76F/C7gW2J1CjN9KKX1k0ZiImAbsD4ylsIrXRw4Bjlo7SWct+nrgRQprnm9DoafB5T3VqdjlLlWxVFgX/aWIGAOFldkiYrdWPvYwcFR2L70PhQU0Gi0HtmxjGDOA47LrjwTeSGut196K/wS6RMT+FFZ/Oy0Ky8USEZ/Ous2h0O1+IoWu8nubOc8fgG9lq9MREY2D8t4D/gqMoXC/fgaFWxN2t6tTMaFL1WXziFhY9DqLQjI9OSKeBJ4GDmvlHLdSWA1vHoWBa7OBt7L3rgHubaUbfm0XALtHxFwK9+nHtXz4h2U9ABcB3wP+O4trdkQ8BUzgg16BKcDngfuzJL22C4EaYG5EPJ3tN5oBLE4p/SPb7pf9lToNV1uTcigiuqeU3o6IXhSWrByeUnq90nFJKh/voUv5dHdEbAV8DLjQZC7lny10SZJywHvokiTlgAldkqQcMKFLkpQDJnRJknLAhC5JUg6Y0CVJyoH/D8aCjRrabxW0AAAAAElFTkSuQmCC\n"
-          },
-          "metadata": {
-            "needs_background": "light"
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "From this we can see that the model is incorrectly classifying shorter reviews as negative, and incorrectly classifying a lot of longer reviews as positive.\n",
-        "\n",
-        "We could try clipping the original review text before embedding to see if this helps"
-      ],
-      "metadata": {
-        "id": "jFDBkcmtS-k0"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "print(f'Average length of review in the training data: {round(sum([len(x) for x in x_train]) / len(x_train), 1)}')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "cn7FlKfsTY1y",
-        "outputId": "ae5f483f-8010-44a3-9746-96163d404e96"
-      },
-      "execution_count": 76,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Average length of review in the training data: 761.7\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "train_df = pd.DataFrame({'text': x_train, 'label': y_train})\n",
-        "train_df['length'] = [len(t) for t in train_df.text]\n",
-        "print(f'Average length of negative class reviews: {round(sum(train_df[train_df.label == 0].length) / len(train_df[train_df.label == 0]),1)}')\n",
-        "print(f'Average length of postive class reviews: {round(sum(train_df[train_df.label == 1].length) / len(train_df[train_df.label == 1]),1)}')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "-4_onFMgWY5u",
-        "outputId": "bcb4fd63-c60b-4c05-bd87-993188c92480"
-      },
-      "execution_count": 85,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Average length of negative class reviews: 428.7\n",
-            "Average length of postive class reviews: 1091.4\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Trim all reviews to first 500 characters\n",
-        "x_train_trimmed = [x[0:500] for x in x_train]\n",
-        "sum([len(x) for x in x_train_trimmed]) / len(x_train_trimmed)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "MLzJufezYJD8",
-        "outputId": "6a093973-fd66-4d02-9751-07d55c578689"
-      },
-      "execution_count": 87,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "391.8607729181691"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 87
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Do the same for text and validation\n",
-        "x_valid_trimmed = [x[0:500] for x in x_valid]\n",
-        "x_test_trimmed = [x[0:500] for x in x_test]"
-      ],
-      "metadata": {
-        "id": "t7kxBAcrY1RA"
-      },
-      "execution_count": 88,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def plot_sentence_embeddings_length(text_list, tokenizer):\n",
-        "    tokenized_texts = list(map(lambda t: tokenizer.tokenize(t), text_list))\n",
-        "    tokenized_texts_len = list(map(lambda t: len(t), tokenized_texts))\n",
-        "    print(f\"Average review embedding length: {sum(tokenized_texts_len)/len(tokenized_texts_len)}\")\n",
-        "    fig, ax = plt.subplots(figsize=(8, 5));\n",
-        "    ax.hist(tokenized_texts_len, bins=40);\n",
-        "    ax.set_xlabel(\"Length of Review Embeddings\");\n",
-        "    ax.set_ylabel(\"Number of Reviews\");\n",
-        "    return"
-      ],
-      "metadata": {
-        "id": "fraaRzhWL0vP"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Now check what the distribution of embedding length looks like before choosing a max length param:\n",
-        "plot_sentence_embeddings_length(x_train_trimmed, roberta_tokenizer)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 352
-        },
-        "id": "ql_UaTVEZDHr",
-        "outputId": "5dcf1741-e76d-4108-d91a-f944b4203e2a"
-      },
-      "execution_count": 90,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Average review embedding length: 91.78568701480508\n"
-          ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<Figure size 576x360 with 1 Axes>"
-            ],
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgEAAAE9CAYAAACStrEqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5glVXnv8e9PEC944TYhhEFnxIkcTFBxRLzEGwkMoo4PUSPx0dHDkZwTYvCoMWASiaIR41Eix0gkQsDEAyJeIILiiKjEyGUA5SphuCiDIKMgeAWB9/xRq2E7dvdsZnrvpru+n+fZz65atarqXbt7pt+9alWtVBWSJKl/HjTbAUiSpNlhEiBJUk+ZBEiS1FMmAZIk9ZRJgCRJPWUSIElST2062wGM2zbbbFOLFi2a7TAkSRqLCy644AdVtWCybb1LAhYtWsSqVatmOwxJksYiyXem2ublAEmSempkSUCSY5PcnOTSSba9OUkl2aatJ8mRSVYnuTjJrgN1VyS5qr1WDJQ/NcklbZ8jk2RUbZEkaT4aZU/AccCydQuT7ADsCXx3oHhvYEl7HQAc1epuBRwKPB3YDTg0yZZtn6OA1w/s92vnkiRJUxtZElBVXwNumWTTEcBbgcFJC5YDH6vOOcAWSbYD9gJWVtUtVXUrsBJY1rY9qqrOqW7yg48BLx1VWyRJmo/GOiYgyXLghqr61jqbtgeuH1hf08qmK18zSbkkSRrS2O4OSPJw4G10lwLGKskBdJcZeMxjHjPu00uS9IA0zp6AHYHFwLeSXAcsBC5M8pvADcAOA3UXtrLpyhdOUj6pqjq6qpZW1dIFCya9VVKSpN4ZWxJQVZdU1W9U1aKqWkTXhb9rVd0EnAq8pt0lsDtwW1XdCJwB7JlkyzYgcE/gjLbt9iS7t7sCXgOcMq62SJI0H4zyFsETgG8AT0iyJsn+01Q/HbgGWA38M/CnAFV1C3AYcH57vbOV0ep8tO1zNfD5UbRDkqT5Kt3g+v5YunRp+cRASVJfJLmgqpZOts0nBkqS1FO9mztAG2/RwacNVe+6w/cZcSSSpI1hT4AkST1lEiBJUk+ZBEiS1FMmAZIk9ZRJgCRJPWUSIElST5kESJLUUyYBkiT1lEmAJEk9ZRIgSVJPmQRIktRTJgGSJPWUSYAkST1lEiBJUk+ZBEiS1FMmAZIk9ZRJgCRJPWUSIElST2062wFo/lp08GlD1bvu8H1GHIkkaTL2BEiS1FMmAZIk9ZRJgCRJPWUSIElST5kESJLUUyYBkiT1lEmAJEk9ZRIgSVJPjSwJSHJskpuTXDpQ9r4k305ycZLPJNliYNshSVYnuTLJXgPly1rZ6iQHD5QvTnJuK/9Eks1G1RZJkuajUfYEHAcsW6dsJfA7VbUL8F/AIQBJdgZeCTyx7fPhJJsk2QT4R2BvYGdgv1YX4L3AEVX1eOBWYP8RtkWSpHlnZElAVX0NuGWdsi9W1V1t9RxgYVteDpxYVXdU1bXAamC39lpdVddU1Z3AicDyJAFeAJzc9j8eeOmo2iJJ0nw0m2MC/jvw+ba8PXD9wLY1rWyq8q2BHw0kFBPlkiRpSLOSBCT5K+Au4ONjOt8BSVYlWbV27dpxnFKSpAe8sScBSV4LvAh4VVVVK74B2GGg2sJWNlX5D4Etkmy6TvmkquroqlpaVUsXLFgwI+2QJGmuG2sSkGQZ8FbgJVX1s4FNpwKvTPKQJIuBJcB5wPnAknYnwGZ0gwdPbcnDWcDL2v4rgFPG1Q5JkuaDUd4ieALwDeAJSdYk2R/4EPBIYGWSbyb5J4Cqugw4Cbgc+AJwYFXd3a75/xlwBnAFcFKrC/CXwJuSrKYbI3DMqNoiSdJ8tOn6q2yYqtpvkuIp/1BX1buBd09Sfjpw+iTl19DdPSBJkjaATwyUJKmnTAIkSeopkwBJknrKJECSpJ4yCZAkqadMAiRJ6imTAEmSesokQJKknjIJkCSpp0wCJEnqKZMASZJ6yiRAkqSeMgmQJKmnTAIkSeopkwBJknrKJECSpJ4yCZAkqadMAiRJ6imTAEmSesokQJKknjIJkCSpp0wCJEnqKZMASZJ6yiRAkqSeMgmQJKmnTAIkSeopkwBJknrKJECSpJ4yCZAkqadGlgQkOTbJzUkuHSjbKsnKJFe19y1beZIcmWR1kouT7Dqwz4pW/6okKwbKn5rkkrbPkUkyqrZIkjQfjbIn4Dhg2TplBwNnVtUS4My2DrA3sKS9DgCOgi5pAA4Fng7sBhw6kTi0Oq8f2G/dc0mSpGmMLAmoqq8Bt6xTvBw4vi0fD7x0oPxj1TkH2CLJdsBewMqquqWqbgVWAsvatkdV1TlVVcDHBo4lSZKGMO4xAdtW1Y1t+SZg27a8PXD9QL01rWy68jWTlEuSpCHN2sDA9g2+xnGuJAckWZVk1dq1a8dxSkmSHvDGnQR8v3Xl095vbuU3ADsM1FvYyqYrXzhJ+aSq6uiqWlpVSxcsWLDRjZAkaT4YdxJwKjAxwn8FcMpA+WvaXQK7A7e1ywZnAHsm2bINCNwTOKNtuz3J7u2ugNcMHEuSJA1h01EdOMkJwPOAbZKsoRvlfzhwUpL9ge8Ar2jVTwdeCKwGfga8DqCqbklyGHB+q/fOqpoYbPindHcgPAz4fHtJkqQhjSwJqKr9pti0xyR1CzhwiuMcCxw7Sfkq4Hc2JkZJkvrsfl0OSPKgJI8aVTCSJGl81psEJPl/SR6VZHPgUuDyJH8x+tAkSdIoDdMTsHNV3U73MJ7PA4uBV480KkmSNHLDJAEPTvJguiTg1Kr6JWO6v1+SJI3OMEnAR4DrgM2BryV5LHD7KIOSJEmjt94koKqOrKrtq+qFbRT/d4Hnjz40SZI0Suu9RTDJ1cA5wNnA2VV1GXDXqAOTJEmjNdTAQLpLAlsD70tydZLPjDYsSZI0asMkAXcDv2zv99A97//mafeQJEkPeMM8MfB24BLgA8A/V9UPRxuSJEkah2F6AvYDvkb3rP4Tk7wjya89+leSJM0t6+0JqKpTgFOS7ATsDbwReCvdxD2SJGmOGuaxwZ9Kshr4IPBwuml7txx1YJIkabSGGRPwHuCiqrp71MFIkqTxGWZMwOXAIUmOBkiyJMmLRhuWJEkatWGSgH8B7gSe2dZvAN41sogkSdJYDJME7FhVf0/3rACq6mdARhqVJEkauWGSgDuTPIw2c2CSHYE7RhqVJEkauWEGBh4KfAHYIcnHgWcBrx1lUJIkafSGeU7AyiQXArvTXQY4qKp+MPLIJEnSSE15OaA9HIgkuwKPBW4Evgc8ppVJkqQ5bLqegDcBBwDvn2RbAS8YSUSSJGkspkwCquqA9v788YUjSZLGZZjHBl+c5JB2V4AkSZonhrlF8MXA3cBJSc5P8pYkjxlxXJIkacTWmwRU1Xeq6u+r6qnAHwO7ANeOPDJJkjRSwzwngCSPBf6ove6mm0pYkiTNYetNApKcCzwY+CTw8qq6ZuRRSZKkkRumJ+A1VXXlyCORJEljNczAwB8lOSbJ5wGS7Jxk/405aZL/neSyJJcmOSHJQ5MsTnJuktVJPpFks1b3IW19ddu+aOA4h7TyK5PstTExSZLUN8MkAccBZwC/1db/C3jjhp4wyfbAnwNLq+p3gE2AVwLvBY6oqscDtwITicb+wK2t/IhWjyQ7t/2eCCwDPpxkkw2NS5KkvhkmCdimqk4C7gGoqrvoBgdujE2BhyXZFHg43SOJXwCc3LYfD7y0LS9v67TteyRJKz+xqu6oqmuB1cBuGxmXJEm9MUwS8NMkW3PfVMK7A7dt6Amr6gbg/wDfpfvjfxtwAfCjlmAArAG2b8vbA9e3fe9q9bceLJ9kn1+R5IAkq5KsWrt27YaGLknSvDJMEvAm4FRgxyRfBz4GvGFDT5hkS7pv8YvpLjFsTtedPzJVdXRVLa2qpQsWLBjlqSRJmjOGmUr4wiTPBZ5AN5XwlWxct/vvA9dW1VqAJJ8GngVskWTT9m1/IXBDq38DsAOwpl0+eDTww4HyCYP7SJKk9ZhuKuFNkuyX5C3AE6rqMmAR8FXgQxtxzu8Cuyd5eLu2vwdwOXAW8LJWZwVwSls+ta3Ttn+5qqqVv7LdPbAYWAKctxFxSZLUK9P1BBxD9037POD/Jvke8FTgkKr67IaesKrOTXIycCFwF3ARcDRwGnBikne1smMG4vjXJKuBW+juCKCqLktyEl0CcRdwYFVt7IBFSZJ6Y7okYCmwS1Xdk+ShwE3AjlX1w409aVUdChy6TvE1THKZoap+Abx8iuO8G3j3xsYjSVIfTTcw8M6qmrgt8BfANTORAEiSpAeG6XoCdkpycVsO3d0BF7flqqpdRh6dJEkamemSgP82tigkSdLYTZkEVNV3xhmIJEkar2EeFiRJkuYhkwBJknpquocFndne3zu+cCRJ0rhMNzBwuyTPBF6S5ES6uwLuVVUXjjQySZI0UtMlAW8H/obumfwfWGdb0U39K0mS5qjp7g44GTg5yd9U1WFjjEmSJI3BMLMIHpbkJcBzWtFXqupzow1LkiSN2nrvDkjyHuAguol6LgcOSvJ3ow5MkiSN1np7AoB9gCdPzCOQ5Hi6Wf7eNsrAJEnSaA37nIAtBpYfPYpAJEnSeA3TE/Ae4KIkZ9HdJvgc4OCRRiVJkkZumIGBJyT5CvC0VvSXVXXTSKOSJEkjN0xPAFV1I3DqiGORJElj5NwBkiT1lEmAJEk9NW0SkGSTJN8eVzCSJGl8pk0Cqupu4MokjxlTPJIkaUyGGRi4JXBZkvOAn04UVtVLRhaVJEkauWGSgL8ZeRSSJGnshnlOwFeTPBZYUlVfSvJwYJPRhyZJkkZpmAmEXg+cDHykFW0PfHaUQUmSpNEb5hbBA4FnAbcDVNVVwG+MMihJkjR6wyQBd1TVnRMrSTYFanQhSZKkcRgmCfhqkrcBD0vyB8AngX8fbViSJGnUhkkCDgbWApcAfwKcDvz1KIOSJEmjt94koKruAY4HDgPeARxfVRt1OSDJFklOTvLtJFckeUaSrZKsTHJVe9+y1U2SI5OsTnJxkl0HjrOi1b8qyYqNiUmSpL4Z5u6AfYCrgSOBDwGrk+y9kef9IPCFqtoJeBJwBV2Pw5lVtQQ4s60D7A0saa8DgKNaXFsBhwJPB3YDDp1IHCRJ0voNczng/cDzq+p5VfVc4PnAERt6wiSPBp4DHANQVXdW1Y+A5XQ9DrT3l7bl5cDHqnMOsEWS7YC9gJVVdUtV3QqsBJZtaFySJPXNMEnAj6tq9cD6NcCPN+Kci+nGGPxLkouSfDTJ5sC2VXVjq3MTsG1b3h64fmD/Na1sqvJfk+SAJKuSrFq7du1GhC5J0vwxZRKQZN8k+wKrkpye5LXtuvu/A+dvxDk3BXYFjqqqp9DNR3DwYIU25mDGbkOsqqOramlVLV2wYMFMHVaSpDltup6AF7fXQ4HvA88Fnkf3Lf5hG3HONcCaqjq3rZ9MlxR8v3Xz095vbttvAHYY2H9hK5uqXJIkDWHKuQOq6nWjOGFV3ZTk+iRPqKorgT2Ay9trBXB4ez+l7XIq8GdJTqQbBHhbVd2Y5Azg7wYGA+4JHDKKmCVJmo/WO4FQksXAG4BFg/U3cirhNwAfT7IZ3RiD19H1SpyUZH/gO8ArWt3TgRcCq4GftbpU1S1JDuO+SxPvrKpbNiImSZJ6ZZiphD9LN5L/34F7ZuKkVfVNYOkkm/aYpG7RzV8w2XGOBY6diZgkSeqbYZKAX1TVkSOPRJIkjdUwScAHkxwKfBG4Y6Kwqi4cWVSSJGnkhkkCfhd4NfAC7rscUG1dkiTNUcMkAS8HHjc4nbAkSZr7hnli4KXAFqMORJIkjdcwPQFbAN9Ocj6/OiZgY24RlCRJs2yYJODQkUchSZLGbr1JQFV9dRyBSJKk8RrmiYE/5r7JfDYDHgz8tKoeNcrAJEnSaA3TE/DIieUkAZYDu48yKEmSNHrD3B1wr+p8FthrRPFIkqQxGeZywL4Dqw+ie+b/L0YWkSRJGoth7g548cDyXcB1dJcEJEnSHDbMmIDXjSMQSZI0XlMmAUnePs1+VVWHjSAeSZI0JtP1BPx0krLNgf2BrQGTAEmS5rApk4Cqev/EcpJHAgcBrwNOBN4/1X6SJGlumHZMQJKtgDcBrwKOB3atqlvHEZgkSRqt6cYEvA/YFzga+N2q+snYopIkSSM33cOC3gz8FvDXwPeS3N5eP05y+3jCkyRJozLdmID79TRBSZI0t/iHXpKknjIJkCSpp0wCJEnqKZMASZJ6yiRAkqSeGmYWQY3RooNPG6redYfvM+JIJEnznT0BkiT11KwlAUk2SXJRks+19cVJzk2yOsknkmzWyh/S1le37YsGjnFIK78yyV6z0xJJkuam2ewJOAi4YmD9vcARVfV44Fa62Qpp77e28iNaPZLsDLwSeCKwDPhwkk3GFLskSXPerIwJSLIQ2Ad4N/CmJAFeAPxxq3I88LfAUcDytgxwMvChVn85cGJV3QFcm2Q1sBvwjTE1434Z9lr/KI7n+AFJ0mRmqyfgH4C3Ave09a2BH1XVXW19DbB9W94euB6gbb+t1b+3fJJ9JEnSeow9CUjyIuDmqrpgjOc8IMmqJKvWrl07rtNKkvSANhs9Ac8CXpLkOuBEussAHwS2SDJxeWIhcENbvgHYAaBtfzTww8HySfb5FVV1dFUtraqlCxYsmNnWSJI0R409CaiqQ6pqYVUtohvY9+WqehVwFvCyVm0FcEpbPrWt07Z/uaqqlb+y3T2wGFgCnDemZkiSNOc9kB4W9JfAiUneBVwEHNPKjwH+tQ38u4UucaCqLktyEnA5cBdwYFXdPf6wJUmam2Y1CaiqrwBfacvX0I3uX7fOL4CXT7H/u+nuMJAkSfeTTwyUJKmnTAIkSeopkwBJknrKJECSpJ4yCZAkqaceSLcIakRmet4CSdL8YE+AJEk9ZRIgSVJPmQRIktRTJgGSJPWUSYAkST1lEiBJUk+ZBEiS1FMmAZIk9ZRJgCRJPWUSIElST5kESJLUUyYBkiT1lEmAJEk9ZRIgSVJPmQRIktRTJgGSJPWUSYAkST1lEiBJUk+ZBEiS1FMmAZIk9ZRJgCRJPWUSIElST5kESJLUU2NPApLskOSsJJcnuSzJQa18qyQrk1zV3rds5UlyZJLVSS5OsuvAsVa0+lclWTHutkiSNJfNRk/AXcCbq2pnYHfgwCQ7AwcDZ1bVEuDMtg6wN7CkvQ4AjoIuaQAOBZ4O7AYcOpE4SJKk9Rt7ElBVN1bVhW35x8AVwPbAcuD4Vu144KVteTnwseqcA2yRZDtgL2BlVd1SVbcCK4FlY2yKJElz2qyOCUiyCHgKcC6wbVXd2DbdBGzblrcHrh/YbU0rm6pckiQNYdaSgCSPAD4FvLGqbh/cVlUF1Aye64Akq5KsWrt27UwdVpKkOW1WkoAkD6ZLAD5eVZ9uxd9v3fy095tb+Q3ADgO7L2xlU5X/mqo6uqqWVtXSBQsWzFxDJEmaw2bj7oAAxwBXVNUHBjadCkyM8F8BnDJQ/pp2l8DuwG3tssEZwJ5JtmwDAvdsZZIkaQibzsI5nwW8GrgkyTdb2duAw4GTkuwPfAd4Rdt2OvBCYDXwM+B1AFV1S5LDgPNbvXdW1S3jaYIkSXPf2JOAqvoPIFNs3mOS+gUcOMWxjgWOnbnoJEnqD58YKElST5kESJLUUyYBkiT1lEmAJEk9ZRIgSVJPmQRIktRTJgGSJPWUSYAkST1lEiBJUk+ZBEiS1FMmAZIk9ZRJgCRJPWUSIElST5kESJLUUyYBkiT1lEmAJEk9telsByAtOvi0oepdd/g+I45EkvrFngBJknrKJECSpJ4yCZAkqadMAiRJ6imTAEmSesokQJKknjIJkCSpp0wCJEnqKZMASZJ6yiRAkqSeMgmQJKmnnDtgIw373HtJkh5o5nxPQJJlSa5MsjrJwbMdjyRJc8WcTgKSbAL8I7A3sDOwX5KdZzcqSZLmhjmdBAC7Aaur6pqquhM4EVg+yzFJkjQnzPUxAdsD1w+srwGePkuxaMSGHX9x3eH7jDgSSZof5noSMJQkBwAHtNWfJLlyhg69DfCDGTrWXPKAbnfeO5LDPqDbPEJ9bLdt7o++tPuxU22Y60nADcAOA+sLW9mvqKqjgaNn+uRJVlXV0pk+7gNdH9vdxzZDP9ttm/ujr+0eNNfHBJwPLEmyOMlmwCuBU2c5JkmS5oQ53RNQVXcl+TPgDGAT4NiqumyWw5IkaU6Y00kAQFWdDpw+S6ef8UsMc0Qf293HNkM/222b+6Ov7b5Xqmq2Y5AkSbNgro8JkCRJG8gkYAPN18cVJzk2yc1JLh0o2yrJyiRXtfctW3mSHNk+g4uT7Dp7kW+4JDskOSvJ5UkuS3JQK5/v7X5okvOSfKu1+x2tfHGSc1v7PtEG3ZLkIW19ddu+aDbj3xhJNklyUZLPtfU+tPm6JJck+WaSVa1svv+Ob5Hk5CTfTnJFkmfM9zbfXyYBG2CeP674OGDZOmUHA2dW1RLgzLYOXfuXtNcBwFFjinGm3QW8uap2BnYHDmw/z/ne7juAF1TVk4AnA8uS7A68Fziiqh4P3Ars3+rvD9zayo9o9eaqg4ArBtb70GaA51fVkwdui5vvv+MfBL5QVTsBT6L7mc/3Nt8/VeXrfr6AZwBnDKwfAhwy23HNYPsWAZcOrF8JbNeWtwOubMsfAfabrN5cfgGnAH/Qp3YDDwcupHvi5g+ATVv5vb/rdHfhPKMtb9rqZbZj34C2LqT7z/8FwOeAzPc2t/ivA7ZZp2ze/o4DjwauXffnNZ/bvCEvewI2zGSPK95+lmIZh22r6sa2fBOwbVued59D6+59CnAuPWh36xb/JnAzsBK4GvhRVd3Vqgy27d52t+23AVuPN+IZ8Q/AW4F72vrWzP82AxTwxSQXtKeowvz+HV8MrAX+pV36+WiSzZnfbb7fTAJ0v1SXIs/LW0qSPAL4FPDGqrp9cNt8bXdV3V1VT6b7drwbsNMshzRSSV4E3FxVF8x2LLPg2VW1K12394FJnjO4cR7+jm8K7AocVVVPAX7KfV3/wLxs8/1mErBhhnpc8Tzy/STbAbT3m1v5vPkckjyYLgH4eFV9uhXP+3ZPqKofAWfRdYVvkWTiGSKDbbu33W37o4EfjjnUjfUs4CVJrqObdfQFdNeN53ObAaiqG9r7zcBn6JK++fw7vgZYU1XntvWT6ZKC+dzm+80kYMP07XHFpwIr2vIKumvmE+WvaaNqdwduG+hmmzOSBDgGuKKqPjCwab63e0GSLdryw+jGQVxBlwy8rFVbt90Tn8fLgC+3b1JzRlUdUlULq2oR3b/bL1fVq5jHbQZIsnmSR04sA3sClzKPf8er6ibg+iRPaEV7AJczj9u8QWZ7UMJcfQEvBP6L7hrqX812PDPYrhOAG4Ff0mXS+9NdAz0TuAr4ErBVqxu6uySuBi4Bls52/BvY5mfTdQleDHyzvV7Yg3bvAlzU2n0p8PZW/jjgPGA18EngIa38oW19ddv+uNluw0a2/3nA5/rQ5ta+b7XXZRP/Z/Xgd/zJwKr2O/5ZYMv53ub7+/KJgZIk9ZSXAyRJ6imTAEmSesokQJKknjIJkCSpp0wCJEnqKZMAaSMk+cmIj//GJA+fifO1GfG+1GaR+6N1th2X5Nq27VtJ9tjAcyxNcuSGxjjFMe9ucU28hp61M8nz0mYK3MBzT7l/m5Vvm7b8nxt6Dmk2bbr+KpJm0RuBfwN+NgPHegpAdY8JnsxfVNXJSZ4PHE03m9r9UlWr6O7Lnkk/nybmB4SqeuZsxyBtCHsCpBmWZMckX2gTtZydZKdWflybr/w/k1yT5GWt/EFJPtzmPF+Z5PQkL0vy58BvAWclOWvg+O9u39bPSbLtJOffKsln25zo5yTZJclv0CUTT2vfpnecpgnfoE2c0iYYel+S89vx/qSVn5hkn4FzHtdivvebc3tK3bFJzmsTuCxv5acl2aUtX5Tk7W35nUlefz8+5+uSvKe1Z1WSXZOckeTqJP9zoOqj2jmvTPJPSR7U9t8zyTeSXJjkk+nmjiDJsvazuBDYd+B8Wyf5YpLLknyU7uEyE9t+0t6fl+QruW8O+48nSdv2wlZ2Qfs9mPicnjvQy3FR2pP9pHEwCZBm3tHAG6rqqcBbgA8PbNuO7gmFLwIOb2X70k3fvDPwarrn91NVRwLfo5sD/vmt7ubAOVX1JOBrwGR/NN8BXFRVuwBvAz5W3fPi/wdwdnXzyV89TfzL6J6uBt0TI2+rqqcBTwNen2Qx8AngFQDpHp29B3DaOsf5K7rH7O4GPB94X7pH1p4N/F6SRwN30T3PH+D3WpvW9bB1LgcMXsr4buslOBs4ju7Rvru3z2DCbsAb6D7fHYF9Wzf+XwO/X92kOquANyV5KPDPwIuBpwK/OXCcQ4H/qKon0j17/zFTfH5PoevB2ZnuSX3Pasf9CLB3+71YMFD/LcCBrR2/B/x8iuNKM87LAdIMat8mnwl8sn0BBHjIQJXPVtU9wOUD3+KfDXyyld80+K1/EncCE9eoL6B73v+6ng38IUBVfbl9g33UEOG/L8nf0U2c8oxWtiewy0SvBd0EOkuAzwMfTPIQuqTha1X184E2T+z7kiRvaesPpfvDeTbw53RzvZ8G/EG6cQ+Lq+rKSeKa7nLAxJwdlwCPqKofAz9OckfavAjAeVV1DUCSE+g+n1/Q/ZH+eot5M7oekJ2Aa6vqqlb/34CJaXefQ+sZqKrTktw6RUznVdWatv836RK8nwDXVNW1rc4JA8f9OvCBJB8HPj2xrzQOJgHSzHoQ3dz0U/3RumNgOVPUmc4v675nfd/NzP4bnhgT8AbgWLpvwqHr1Thj3cpJvgLsBfwR3Yx8v1YF+MN1/7C3noOlwDXASmAbuh6NDZned+LzvIdf/Wzv4b7PZt1no1eLbWVV7bdObDMx9mAwjvX+jKrq8CSn0c1X8fUke1XVt2cgDmm9vBwgzaCquh24NsnLoZuhMMmT1rPb14E/bGMDtqWb2GbCj4H7e434bOBV7fzPA37Q4hrWh4AHJdkLOAP4X+mmWibJb7cufeguCbyOrgv7C5Mc5wzgDeuikhIAAAGUSURBVAPXxCcGJt4JXA+8nO7b99l0XeKTXQqYCbulm/HzQXQJy38A59B10z++xbZ5kt8Gvg0sGhgzMZgkfA3441Z/b7rJaIZ1JfC4JIva+r2XNJLsWFWXVNV76WYo3el+tk/aYCYB0sZ5eJI1A6830f0B3j/JxIxty9dzjE/Rzdh4Od3gvQuB29q2o4EvrOcSwbr+Fnhqkovpxh2smL76r2o9De8C3gp8tMV1YZJL6a5rT3yz/SLwXOBL7Q/7ug4DHgxcnOSytj7hbODmqvp5W17Y3iez7piAw6eoN5Xz6RKbK+guQXymqtYCrwVOaJ/TN4CdquoXdN30p7WBgTcPHOcdwHNaW/YFvjtsAK2df0r3s7yALrmb+Bm/McmlLY5f0l1qkcbCWQSlB4Akj6iqnyTZmm7K2mdVNx+65omBn/HElLVXVdURsx2X+s0xAdIDw+faQLbNgMNMAOal1ydZQfczvoiuV0WaVfYESJLUU44JkCSpp0wCJEnqKZMASZJ6yiRAkqSeMgmQJKmnTAIkSeqp/w/49wWMjOObtAAAAABJRU5ErkJggg==\n"
-          },
-          "metadata": {
-            "needs_background": "light"
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Now try with an embedding size of 64, 128, 192 \n",
-        "max_lengths = [64,128,192]\n",
-        "for length in max_lengths:\n",
-        "  run_roberta(length, roberta_tokenizer, str.format('roberta_model_500_trimmed_{length}', length = length), \n",
-        "                                                    x_train = x_train_trimmed,\n",
-        "                                                    y_train = y_train,\n",
-        "                                                    x_valid = x_valid_trimmed,\n",
-        "                                                    y_valid = y_valid,\n",
-        "                                                    x_test = x_test_trimmed,\n",
-        "                                                    y_test = y_test)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "fZsxHmQNZcuB",
-        "outputId": "8b28f357-361c-4f88-9d6a-aec86741eafb"
-      },
-      "execution_count": 98,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_64\n",
-            "Running roBERTa for encoding max_length: 64\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 64)\n",
-            "Created encoding for validation data with shape (5893, 64)\n",
-            "Created encoding for test data with shape (5894, 64)\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1474/1474 [==============================] - 162s 107ms/step - loss: 0.6673 - accuracy: 0.6195 - val_loss: 0.6405 - val_accuracy: 0.6671\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 154s 104ms/step - loss: 0.6270 - accuracy: 0.6785 - val_loss: 0.6000 - val_accuracy: 0.7209\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 154s 104ms/step - loss: 0.6084 - accuracy: 0.6943 - val_loss: 0.5796 - val_accuracy: 0.7231\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 154s 104ms/step - loss: 0.5964 - accuracy: 0.7026 - val_loss: 0.5660 - val_accuracy: 0.7329\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 16s 88ms/step - loss: 0.5790 - accuracy: 0.7126\n",
-            "Test loss: 0.578998327255249\n",
-            "Test accuracy: 0.7125890851020813\n",
-            "185/185 [==============================] - 19s 86ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.73      0.70      0.71      2992\n",
-            "           1       0.70      0.73      0.71      2902\n",
-            "\n",
-            "    accuracy                           0.71      5894\n",
-            "   macro avg       0.71      0.71      0.71      5894\n",
-            "weighted avg       0.71      0.71      0.71      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_64/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_64/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_128\n",
-            "Running roBERTa for encoding max_length: 128\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 128)\n",
-            "Created encoding for validation data with shape (5893, 128)\n",
-            "Created encoding for test data with shape (5894, 128)\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1474/1474 [==============================] - 296s 197ms/step - loss: 0.6277 - accuracy: 0.6809 - val_loss: 0.5895 - val_accuracy: 0.7349\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 288s 196ms/step - loss: 0.5846 - accuracy: 0.7258 - val_loss: 0.5603 - val_accuracy: 0.7433\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 287s 195ms/step - loss: 0.5693 - accuracy: 0.7349 - val_loss: 0.5434 - val_accuracy: 0.7473\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 287s 195ms/step - loss: 0.5605 - accuracy: 0.7401 - val_loss: 0.5353 - val_accuracy: 0.7478\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 30s 162ms/step - loss: 0.5441 - accuracy: 0.7396\n",
-            "Test loss: 0.5440837144851685\n",
-            "Test accuracy: 0.7395656704902649\n",
-            "185/185 [==============================] - 32s 159ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.72      0.79      0.76      2992\n",
-            "           1       0.76      0.68      0.72      2902\n",
-            "\n",
-            "    accuracy                           0.74      5894\n",
-            "   macro avg       0.74      0.74      0.74      5894\n",
-            "weighted avg       0.74      0.74      0.74      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_128/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_128/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_192\n",
-            "Running roBERTa for encoding max_length: 192\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 192)\n",
-            "Created encoding for validation data with shape (5893, 192)\n",
-            "Created encoding for test data with shape (5894, 192)\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1474/1474 [==============================] - 443s 297ms/step - loss: 0.6231 - accuracy: 0.6854 - val_loss: 0.5866 - val_accuracy: 0.7356\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 435s 295ms/step - loss: 0.5829 - accuracy: 0.7284 - val_loss: 0.5555 - val_accuracy: 0.7478\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 435s 295ms/step - loss: 0.5662 - accuracy: 0.7370 - val_loss: 0.5387 - val_accuracy: 0.7539\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 435s 295ms/step - loss: 0.5585 - accuracy: 0.7416 - val_loss: 0.5296 - val_accuracy: 0.7573\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 45s 243ms/step - loss: 0.5404 - accuracy: 0.7508\n",
-            "Test loss: 0.540351152420044\n",
-            "Test accuracy: 0.750763475894928\n",
-            "185/185 [==============================] - 47s 239ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.75      0.75      2992\n",
-            "           1       0.74      0.76      0.75      2902\n",
-            "\n",
-            "    accuracy                           0.75      5894\n",
-            "   macro avg       0.75      0.75      0.75      5894\n",
-            "weighted avg       0.75      0.75      0.75      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_192/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_192/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Try selecting the middle 500 characters instead:\n",
-        "import math \n",
-        "\n",
-        "def select_middle(data, text_length = 500):\n",
-        "  trimmed_data = []\n",
-        "  for item in data:\n",
-        "    middle_index = math.floor(len(item) / 2)\n",
-        "    if len(item) > text_length:\n",
-        "      lower_bound = middle_index - math.floor(text_length / 2)\n",
-        "      upper_bound = middle_index + math.floor(text_length / 2)\n",
-        "      trimmed_data.append(item[lower_bound:upper_bound])\n",
-        "    else: \n",
-        "      trimmed_data.append(item)\n",
-        "  return trimmed_data"
-      ],
-      "metadata": {
-        "id": "bWWQOpLdfOGZ"
-      },
-      "execution_count": 9,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "x_train_trimmed_middle = select_middle(x_train, 300)\n",
-        "x_valid_trimmed_middle = select_middle(x_valid, 300)\n",
-        "x_test_trimmed_middle = select_middle(x_test, 300)"
-      ],
-      "metadata": {
-        "id": "iZ-qo8eRgk9Y"
-      },
-      "execution_count": 10,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Select middle 300:\n",
-        "# Now try with an embedding size of 64, 128, 192 \n",
-        "max_lengths = [64,128,192]\n",
-        "for length in max_lengths:\n",
-        "  run_roberta(length, roberta_tokenizer, str.format('roberta_model_300_trimmed_middle_{length}', length = length), \n",
-        "                                                    x_train = x_train_trimmed_middle,\n",
-        "                                                    y_train = y_train,\n",
-        "                                                    x_valid = x_valid_trimmed_middle,\n",
-        "                                                    y_valid = y_valid,\n",
-        "                                                    x_test = x_test_trimmed_middle,\n",
-        "                                                    y_test = y_test)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000,
-          "referenced_widgets": [
-            "4f5effcf41474013a945079f28ba31c2",
-            "15b4e427bd5d4050ab0bdec9dbc6d019",
-            "01a250296169438f9e72dc13119b7238",
-            "76309ca6ee104cbdaab8d997e79c190e",
-            "e424adf2dcc74ada98b2d6f9c29b4cee",
-            "41731d8403ad4661a4766d4aab610f3b",
-            "fa15a217dfd349928fd2c8446121a1a4",
-            "bb60c6ad128940faa2839bb74dbc6ab5",
-            "90048e3a214346e88b63a02d0cafd97e",
-            "79fa68293d544160a988b133dbdcc798",
-            "05712e9c7e3943cb9e13690c6443d134"
-          ]
-        },
-        "id": "5p7w4DJ0JvmN",
-        "outputId": "84421579-4187-48a8-c8b5-e605c03742fe"
-      },
-      "execution_count": 11,
-      "outputs": [
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_middle_64\n",
-            "Running roBERTa for encoding max_length: 64\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 64)\n",
-            "Created encoding for validation data with shape (5893, 64)\n",
-            "Created encoding for test data with shape (5894, 64)\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "4f5effcf41474013a945079f28ba31c2",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/627M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 232s 150ms/step - loss: 0.6649 - accuracy: 0.6199 - val_loss: 0.6385 - val_accuracy: 0.6698\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 214s 145ms/step - loss: 0.6325 - accuracy: 0.6632 - val_loss: 0.6125 - val_accuracy: 0.6817\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 216s 147ms/step - loss: 0.6149 - accuracy: 0.6847 - val_loss: 0.5926 - val_accuracy: 0.7086\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 215s 146ms/step - loss: 0.6069 - accuracy: 0.6906 - val_loss: 0.5836 - val_accuracy: 0.7130\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 24s 127ms/step - loss: 0.5946 - accuracy: 0.6992\n",
-            "Test loss: 0.5945922136306763\n",
-            "Test accuracy: 0.6991856098175049\n",
-            "185/185 [==============================] - 26s 120ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.71      0.70      0.70      2992\n",
-            "           1       0.69      0.70      0.70      2902\n",
-            "\n",
-            "    accuracy                           0.70      5894\n",
-            "   macro avg       0.70      0.70      0.70      5894\n",
-            "weighted avg       0.70      0.70      0.70      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_64/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_64/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_middle_128\n",
-            "Running roBERTa for encoding max_length: 128\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 128)\n",
-            "Created encoding for validation data with shape (5893, 128)\n",
-            "Created encoding for test data with shape (5894, 128)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_1/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 371s 246ms/step - loss: 0.6390 - accuracy: 0.6591 - val_loss: 0.6084 - val_accuracy: 0.6952\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 359s 244ms/step - loss: 0.6077 - accuracy: 0.6954 - val_loss: 0.5854 - val_accuracy: 0.7098\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 359s 243ms/step - loss: 0.5978 - accuracy: 0.7004 - val_loss: 0.5748 - val_accuracy: 0.7144\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 359s 244ms/step - loss: 0.5938 - accuracy: 0.7041 - val_loss: 0.5687 - val_accuracy: 0.7180\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 38s 203ms/step - loss: 0.5786 - accuracy: 0.7148\n",
-            "Test loss: 0.5785771012306213\n",
-            "Test accuracy: 0.7147946953773499\n",
-            "185/185 [==============================] - 41s 199ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.72      0.72      0.72      2992\n",
-            "           1       0.71      0.71      0.71      2902\n",
-            "\n",
-            "    accuracy                           0.71      5894\n",
-            "   macro avg       0.71      0.71      0.71      5894\n",
-            "weighted avg       0.71      0.71      0.71      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_128/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_128/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_middle_192\n",
-            "Running roBERTa for encoding max_length: 192\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 192)\n",
-            "Created encoding for validation data with shape (5893, 192)\n",
-            "Created encoding for test data with shape (5894, 192)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_2/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 533s 356ms/step - loss: 0.6385 - accuracy: 0.6627 - val_loss: 0.6068 - val_accuracy: 0.6978\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 519s 352ms/step - loss: 0.6081 - accuracy: 0.6936 - val_loss: 0.5859 - val_accuracy: 0.7059\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 519s 352ms/step - loss: 0.5991 - accuracy: 0.7001 - val_loss: 0.5752 - val_accuracy: 0.7151\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 518s 352ms/step - loss: 0.5934 - accuracy: 0.7056 - val_loss: 0.5683 - val_accuracy: 0.7180\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 54s 291ms/step - loss: 0.5784 - accuracy: 0.7139\n",
-            "Test loss: 0.5783989429473877\n",
-            "Test accuracy: 0.7139464020729065\n",
-            "185/185 [==============================] - 57s 287ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.72      0.72      0.72      2992\n",
-            "           1       0.71      0.70      0.71      2902\n",
-            "\n",
-            "    accuracy                           0.71      5894\n",
-            "   macro avg       0.71      0.71      0.71      5894\n",
-            "weighted avg       0.71      0.71      0.71      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_192/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_192/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Now try with an embedding size of 64, 128, 192 \n",
-        "max_lengths = [64,128,192]\n",
-        "for length in max_lengths:\n",
-        "  run_roberta(length, roberta_tokenizer, str.format('roberta_model_500_trimmed_middle_{length}', length = length), \n",
-        "                                                    x_train = x_train_trimmed_middle,\n",
-        "                                                    y_train = y_train,\n",
-        "                                                    x_valid = x_valid_trimmed_middle,\n",
-        "                                                    y_valid = y_valid,\n",
-        "                                                    x_test = x_test_trimmed_middle,\n",
-        "                                                    y_test = y_test)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "VHrGNxScq2mu",
-        "outputId": "b47ef003-6d25-40df-a674-fefc6f98b7fe"
-      },
-      "execution_count": 103,
-      "outputs": [
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_middle_64\n",
-            "Running roBERTa for encoding max_length: 64\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 64)\n",
-            "Created encoding for validation data with shape (5893, 64)\n",
-            "Created encoding for test data with shape (5894, 64)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_4/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 162s 106ms/step - loss: 0.6457 - accuracy: 0.6470 - val_loss: 0.6151 - val_accuracy: 0.6895\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 154s 104ms/step - loss: 0.6150 - accuracy: 0.6864 - val_loss: 0.5902 - val_accuracy: 0.7136\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 154s 105ms/step - loss: 0.6046 - accuracy: 0.6961 - val_loss: 0.5786 - val_accuracy: 0.7200\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 154s 104ms/step - loss: 0.5991 - accuracy: 0.6996 - val_loss: 0.5718 - val_accuracy: 0.7220\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 16s 88ms/step - loss: 0.5843 - accuracy: 0.7104\n",
-            "Test loss: 0.584299623966217\n",
-            "Test accuracy: 0.710383415222168\n",
-            "185/185 [==============================] - 19s 87ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.72      0.70      0.71      2992\n",
-            "           1       0.70      0.72      0.71      2902\n",
-            "\n",
-            "    accuracy                           0.71      5894\n",
-            "   macro avg       0.71      0.71      0.71      5894\n",
-            "weighted avg       0.71      0.71      0.71      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_64/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_64/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_middle_128\n",
-            "Running roBERTa for encoding max_length: 128\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 128)\n",
-            "Created encoding for validation data with shape (5893, 128)\n",
-            "Created encoding for test data with shape (5894, 128)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_5/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 295s 197ms/step - loss: 0.6237 - accuracy: 0.6860 - val_loss: 0.5900 - val_accuracy: 0.7246\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 287s 195ms/step - loss: 0.5874 - accuracy: 0.7225 - val_loss: 0.5634 - val_accuracy: 0.7483\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 287s 195ms/step - loss: 0.5739 - accuracy: 0.7326 - val_loss: 0.5501 - val_accuracy: 0.7519\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 287s 195ms/step - loss: 0.5655 - accuracy: 0.7374 - val_loss: 0.5399 - val_accuracy: 0.7577\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 30s 162ms/step - loss: 0.5499 - accuracy: 0.7501\n",
-            "Test loss: 0.5498960614204407\n",
-            "Test accuracy: 0.7500848174095154\n",
-            "185/185 [==============================] - 32s 159ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.76      0.75      0.75      2992\n",
-            "           1       0.74      0.75      0.75      2902\n",
-            "\n",
-            "    accuracy                           0.75      5894\n",
-            "   macro avg       0.75      0.75      0.75      5894\n",
-            "weighted avg       0.75      0.75      0.75      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_128/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_128/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_500_trimmed_middle_192\n",
-            "Running roBERTa for encoding max_length: 192\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (47146, 192)\n",
-            "Created encoding for validation data with shape (5893, 192)\n",
-            "Created encoding for test data with shape (5894, 192)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_6/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1474/1474 [==============================] - 443s 297ms/step - loss: 0.6198 - accuracy: 0.6919 - val_loss: 0.5868 - val_accuracy: 0.7266\n",
-            "Epoch 2/4\n",
-            "1474/1474 [==============================] - 435s 295ms/step - loss: 0.5836 - accuracy: 0.7238 - val_loss: 0.5596 - val_accuracy: 0.7506\n",
-            "Epoch 3/4\n",
-            "1474/1474 [==============================] - 435s 295ms/step - loss: 0.5715 - accuracy: 0.7354 - val_loss: 0.5470 - val_accuracy: 0.7531\n",
-            "Epoch 4/4\n",
-            "1474/1474 [==============================] - 435s 295ms/step - loss: 0.5625 - accuracy: 0.7386 - val_loss: 0.5402 - val_accuracy: 0.7536\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 45s 243ms/step - loss: 0.5497 - accuracy: 0.7453\n",
-            "Test loss: 0.5497238636016846\n",
-            "Test accuracy: 0.745334267616272\n",
-            "185/185 [==============================] - 47s 239ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.74      0.77      0.75      2992\n",
-            "           1       0.75      0.72      0.74      2902\n",
-            "\n",
-            "    accuracy                           0.75      5894\n",
-            "   macro avg       0.75      0.74      0.75      5894\n",
-            "weighted avg       0.75      0.75      0.75      5894\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_192/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_192/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Load the model where we take the middle 500 characters and an embedding length of 128:\n",
-        "saved_model = create_roberta_model()\n",
-        "saved_model.load_weights('/content/drive/My Drive/models/Project W266/roberta_model_500_trimmed_middle_128')\n",
-        "\n",
-        "test_encodings_roberta = tokenize(128, x_test, roberta_tokenizer)\n",
-        "\n",
-        "print(f'Created encoding for test data with shape {test_encodings_roberta.input_ids.shape}')\n",
-        "\n",
-        "print('Evaluating model...')\n",
-        "score = saved_model.evaluate([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask], y_test)\n",
-        "\n",
-        "print(\"Test loss:\", score[0])\n",
-        "print(\"Test accuracy:\", score[1])\n",
-        "\n",
-        "predictions = saved_model.predict([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask])\n",
-        "preds = predictions.to_tuple()[0].argmax(1)\n",
-        "print('\\n Classification Report:\\n')\n",
-        "print(classification_report(y_test, preds))\n",
-        "\n",
-        "test_results = pd.DataFrame({'text': x_test, 'label': y_test, 'pred': preds})\n",
-        "test_results['text_length'] = [len(t) for t in test_results.text]\n",
-        "TP = test_results[(test_results.label == 1) & (test_results.pred == 1)]\n",
-        "FP = test_results[(test_results.label == 0) & (test_results.pred == 1)]\n",
-        "TN = test_results[(test_results.label == 0) & (test_results.pred == 0)]\n",
-        "FN = test_results[(test_results.label == 1) & (test_results.pred == 0)]\n",
-        "\n",
-        "print(f'TP size: {TP.shape[0]}')\n",
-        "print(f'FP size: {FP.shape[0]}')\n",
-        "print(f'TN size: {TN.shape[0]}')\n",
-        "print(f'FN size: {FN.shape[0]}')"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "8P_dpSToxasK",
-        "outputId": "029626c7-e318-4bab-b23f-0e9e52ffab31"
-      },
-      "execution_count": 105,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Detecting that an object or model or tf.train.Checkpoint is being deleted with unrestored values. See the following logs for the specific values in question. To silence these warnings, use `status.expect_partial()`. See https://www.tensorflow.org/api_docs/python/tf/train/Checkpoint#restorefor details about the status object returned by the restore function.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Detecting that an object or model or tf.train.Checkpoint is being deleted with unrestored values. See the following logs for the specific values in question. To silence these warnings, use `status.expect_partial()`. See https://www.tensorflow.org/api_docs/python/tf/train/Checkpoint#restorefor details about the status object returned by the restore function.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.0.total\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.0.total\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.0.count\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.0.count\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.1.total\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.1.total\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.1.count\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root).keras_api.metrics.1.count\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Created encoding for test data with shape (5894, 128)\n",
-            "Evaluating model...\n",
-            "185/185 [==============================] - 34s 163ms/step - loss: 0.5621 - accuracy: 0.7391\n",
-            "Test loss: 0.5621094703674316\n",
-            "Test accuracy: 0.7390566468238831\n",
-            "185/185 [==============================] - 32s 160ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.72      0.79      0.75      2992\n",
-            "           1       0.76      0.69      0.72      2902\n",
-            "\n",
-            "    accuracy                           0.74      5894\n",
-            "   macro avg       0.74      0.74      0.74      5894\n",
-            "weighted avg       0.74      0.74      0.74      5894\n",
-            "\n",
-            "TP size: 1990\n",
-            "FP size: 626\n",
-            "TN size: 2366\n",
-            "FN size: 912\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Positive class analysis\n",
-        "fig, ax = plt.subplots(figsize=(8, 5));\n",
-        "ax.hist(TP.text_length, bins=40, color='g', alpha=0.6, label=\"True positives\")\n",
-        "ax.hist(FN.text_length, bins=40, color='r', alpha=0.6, label=\"False negatives\")\n",
-        "ax.set_xlabel(\"Length of Review\")\n",
-        "ax.set_ylabel(\"Number of Reviews\")\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 334
-        },
-        "id": "KSkebzlMlRe0",
-        "outputId": "6d558850-bdfe-478d-87c3-258c4ff0974d"
-      },
-      "execution_count": 106,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<Figure size 576x360 with 1 Axes>"
-            ],
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfQAAAE9CAYAAAD9MZD2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de7hVVbn48e8rknDEvOIl0TAfjRRwh1s0KdN4lC6SVxR/nBLzgqfUzNKsfpWlnSxPN/15AktDT6QYedfS8qiYpQmIgHhDxcRMEI3UVG7v748193aLsFl7s9dem7m/n+dZz5pzzLnmfNfg2bxrjDnmHJGZSJKk9dsG9Q5AkiStOxO6JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAIb1juAdbHVVltl//796x2GJEmdZvr06S9kZt9Vy9frhN6/f3+mTZtW7zAkSeo0EfH06srtcpckqQRM6JIklYAJXZKkElivr6FLklZv2bJlLFiwgNdff73eoaidevXqRb9+/ejZs2dV+5vQJamEFixYwCabbEL//v2JiHqHozbKTBYvXsyCBQvYaaedqvqMXe6SVEKvv/46W265pcl8PRURbLnllm3qYTGhS1JJmczXb2399zOhS5I63OLFi2loaKChoYFtt92W7bffvnl96dKl9Q6v2bRp0zjttNMAuPPOO/nTn/7UvG38+PFcccUV9QqtzbyGLkndwLgbx3Xo8SaMnNDq9i233JKZM2cCcM4559CnTx++9KUvNW9fvnw5G25Y/xTU2NhIY2MjUEnoffr0Yd999wXg5JNPrmdobWYLXZLUKcaOHcvJJ5/M3nvvzVlnncU555zDf/3XfzVvHzhwIPPnzwfgl7/8JUOHDqWhoYFx48axYsWKtx2vf//+nHXWWQwaNIihQ4cyb948AObPn89HPvIRBg8ezPDhw/nrX/8KwK9//WsGDhzIHnvswX777QdUkvjBBx/M/PnzGT9+PD/60Y9oaGjg7rvvbo7vkUceYejQoc3nnT9/PoMGDQJg+vTpfPjDH2bPPfdkxIgRPPfccwBceOGF7LbbbgwePJjRo0d3fGWuhgldktRpFixYwJ/+9Cd++MMfrnGfhx9+mMmTJ3PPPfcwc+ZMevTowaRJk1a776abbsrs2bM55ZRTOP300wE49dRTOfbYY5k1axZjxoxp7lL/9re/za233sqDDz7IDTfc8Jbj9O/fn5NPPpkvfOELzJw5kw996EPN2wYMGMDSpUt56qmnAJg8eTJHH300y5Yt49RTT2XKlClMnz6dz3zmM3zta18D4Pzzz+eBBx5g1qxZjB8/vv0V1gb17+/QOqm2G21t3WOS1BlGjRpFjx49Wt3n9ttvZ/r06ey1114AvPbaa2y99dar3feYY45pfv/CF74AwJ///GeuueYaAD71qU9x1llnATBs2DDGjh3LUUcdxeGHH96muI866igmT57M2WefzeTJk5k8eTKPPvooc+bM4cADDwRgxYoVbLfddgAMHjyYMWPGcOihh3LooYe26VztZUKXJHWajTfeuHl5ww03ZOXKlc3rTbdoZSbHHnss3/3ud9d6vJYjwdc2Knz8+PHcd9993Hzzzey5555Mnz696riPPvpoRo0axeGHH05EsMsuuzB79mx23313/vznP79t/5tvvpmpU6dy44038p3vfIfZs2fXfMyAXe6SpLro378/M2bMAGDGjBnNXdrDhw9nypQpLFy4EIAXX3yRp59e7QRjTJ48ufn9Ax/4AAD77rsvV111FQCTJk1q7j5/4okn2Hvvvfn2t79N3759eeaZZ95yrE022YSXX355tefZeeed6dGjB+eeey5HH300AO9973tZtGhRc0JftmwZDz30ECtXruSZZ57hgAMO4Hvf+x5LlizhlVdeaV8ltYEtdElSXRxxxBFcccUV7L777uy9997suuuuAOy2226cd955HHTQQaxcuZKePXty8cUX8+53v/ttx3jppZcYPHgwG220EVdeeSUAF110EccddxwXXHABffv25Re/+AUAZ555Jo8//jiZyfDhw9ljjz246667mo81cuRIjjzySK6//nouuuiit53r6KOP5swzz2z+4fGOd7yDKVOmcNppp7FkyRKWL1/O6aefzq677sq///u/s2TJEjKT0047jc0226zD629VkZk1P0mtNDY2ZnefD91r6JJW5+GHH+Z973tfvcOoqf79+zNt2jS22mqreodSM6v7d4yI6ZnZuOq+drlLklQCdrlLktZLTfesq8IWuiRJJWBClySpBEzokiSVgAldkqQSMKFLkmqiR48ezVOmNjQ0tDqIrU+fPp0X2DqYOHEif/vb35rXTzjhBObOnVvHiN7kKHdJ6g7Gdez0qUxY+7Mtevfu3TyFallMnDiRgQMH8q53vQuAn//853WO6E220CVJneKVV15h+PDhDBkyhEGDBnH99de/bZ/nnnuO/fbbj4aGBgYOHMjdd98NwG233cYHPvABhgwZwqhRo1b7KNX999+fL3/5ywwdOpRdd921+bMrVqzgzDPPZK+99mLw4MFMKH6MrFy5ks9+9rMMGDCAAw88kI9//ONMmTIFqMzMttdeezFw4EBOOukkMpMpU6Ywbdo0xowZQ0NDA6+99hr7778/06ZNY/z48Zx55pnNsUycOJFTTjkFWP1UsCtWrGDs2LEMHDiQQYMG8aMf/Wid69eELkmqiddee625u/2www6jV69eXHvttcyYMYM77riDL37xi6z6tNJf/epXjBgxgpkzZ/Lggw/S0NDACy+8wHnnnccf/vAHZsyYQWNj4xqnX12+fDl/+ctf+PGPf8y3vvUtAC699FI23XRT7r//fu6//35+9rOf8dRTT3HNNdcwf/585s6dy//8z/+8ZZKVU045hfvvv585c+bw2muvcdNNN3HkkUfS2NjIpEmTmDlzJr17927e/4gjjuDaa69tXp88eTKjR49e41SwM2fO5Nlnn2XOnDnMnj2b4447bp3ru2Zd7hGxA3AFsA2QwCWZ+ZOI2AKYDPQH5gNHZeZLUZkm5yfAx4F/AWMzc0at4pMk1daqXe7Lli3jq1/9KlOnTmWDDTbg2Wef5fnnn2fbbbdt3mevvfbiM5/5DMuWLePQQw+loaGBu+66i7lz5zJs2DAAli5d2jwRy6qapkXdc889m6/Z33bbbcyaNau59b1kyRIef/xx/vjHPzJq1Cg22GADtt12Ww444IDm49xxxx18//vf51//+hcvvvgiu+++OyNHjlzjd+3bty/vec97uPfee9lll1145JFHGDZsGBdffPFqp4IdOXIkTz75JKeeeiqf+MQnOOigg9pRw29Vy2voy4EvZuaMiNgEmB4RvwfGArdn5vkRcTZwNvBl4GPALsVrb+CnxbskqQQmTZrEokWLmD59Oj179qR///7NU6Y22W+//Zg6dSo333wzY8eO5YwzzmDzzTfnwAMPbJ58pTUbbbQRUBmQt3z5cqAyHetFF13EiBEj3rLvLbfcstpjvP7663z2s59l2rRp7LDDDpxzzjlvi3N1Ro8ezdVXX82AAQM47LDDiIhWp4J98MEHufXWWxk/fjxXX301l1122VrP0Zqadbln5nNNLezMfBl4GNgeOAS4vNjtcqBp5vdDgCuy4l5gs4jYrlbxSZI615IlS9h6663p2bMnd9xxx2qnRH366afZZpttOPHEEznhhBOYMWMG++yzD/fccw/z5s0D4NVXX+Wxxx6r+rwjRozgpz/9KcuWLQPgscce49VXX2XYsGH85je/YeXKlTz//PPceeedwJvzsm+11Va88sorzS17aH2K1cMOO4zrr7+eK6+8ktGjRwNrngr2hRdeYOXKlRxxxBGcd955zdPIrotOGeUeEf2B9wP3Adtk5nPFpr9T6ZKHSrJvOTntgqLsOSRJ670xY8YwcuRIBg0aRGNjIwMGDHjbPnfeeScXXHABPXv2pE+fPlxxxRX07duXiRMncswxx/DGG28AcN555zVPt7o2J5xwAvPnz2fIkCFkJn379uW6667jiCOO4Pbbb2e33XZjhx12YMiQIWy66aZsttlmnHjiiQwcOJBtt922ubscYOzYsZx88sn07t37LdfcATbffHPe9773MXfuXIYOHQqseSrY3r17c9xxx7Fy5UqA1bbg26rm06dGRB/gLuA7mXlNRPwjMzdrsf2lzNw8Im4Czs/MPxbltwNfzsxpqxzvJOAkgB133HHPNU163104faqk1ekO06d2hFdeeYU+ffqwePFihg4dyj333POWa/r11pbpU2vaQo+InsBvgEmZeU1R/HxEbJeZzxVd6guL8meBHVp8vF9R9haZeQlwCVTmQ69Z8JKk0jv44IP5xz/+wdKlS/n617/epZJ5W9VylHsAlwIPZ2bL+wtuAI4Fzi/er29RfkpEXEVlMNySFl3zkiR1uKbr5mVQyxb6MOBTwOyIaLpv4atUEvnVEXE88DRwVLHtFiq3rM2jctvaut+UpzazC1+S1k81S+jFtfBYw+bhq9k/gc/VKh5J6m4yk0pnqdZHbR3j5pPiJKmEevXqxeLFi9ucFNQ1ZCaLFy+mV69eVX/GyVkkqYT69evHggULWLRoUb1DUTv16tWLfv36Vb2/CV2SSqhnz57stNNO9Q5Dncgud0mSSsCELklSCZjQJUkqARO6JEklYEKXJKkETOiSJJWAt611E9U+0lWStH6yhS5JUgnYQle7OImLJHUtttAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKoE0JPSI2iIh3VrnvZRGxMCLmtCg7JyKejYiZxevjLbZ9JSLmRcSjETGiLXFJktTdrTWhR8SvIuKdEbExMAeYGxFnVnHsicBHV1P+o8xsKF63FOfYDRgN7F585r8joke1X0KSpO6umhb6bpn5T+BQ4LfATsCn1vahzJwKvFhlHIcAV2XmG5n5FDAPGFrlZyVJ6vaqSeg9I6InlYR+Q2YuA3IdznlKRMwquuQ3L8q2B55psc+CokySJFWhmoQ+AZgPbAxMjYh3A/9s5/l+CuwMNADPAT9o6wEi4qSImBYR0xYtWtTOMCRJKpcN17ZDZl4IXNi0HhF/BQ5oz8ky8/kWx/kZcFOx+iywQ4td+xVlqzvGJcAlAI2NjevSU6BOMO7GcVXtN2HkhBpHIknlVs2guCciYlJEnBwRu2fF8vacLCK2a7F6GJVBdgA3AKMjYqOI2AnYBfhLe84hSVJ3tNYWOrAbsDfwIeCCiHgvMCszD2vtQxFxJbA/sFVELAC+CewfEQ1UrsHPB8YBZOZDEXE1MBdYDnwuM1e06xtJktQNVZPQVwDLiveVwMLi1arMPGY1xZe2sv93gO9UEY8kSVpFNQn9n8Bs4IfAzzJzcW1DkiRJbVXNKPdjgKnAZ4GrIuJbETG8tmFJkqS2qGaU+/XA9RExAPgYcDpwFtC7xrFJkqQqVTPK/TcRMQ/4CfBvwKeBzVv/lCRJ6kzVXEP/LvCAo84lSeq6qrmGPhf4SkRcAhARu0TEwbUNS5IktUU1Cf0XwFJg32L9WeC8mkUkSZLarJqEvnNmfp/Kvehk5r+AqGlUkiSpTapJ6EsjojfFDGsRsTPwRk2jkiRJbVLNoLhvAr8DdoiIScAwYGwtg5IkSW1TzX3ov4+IGcA+VLraP5+ZL9Q8MkmSVLU1drkXD5IhIoYA76Yyf/nfgB2LMkmS1EW01kI/AzgJ+MFqtiXwkZpEJEmS2myNCT0zTyreD+i8cCRJUntU8+jXWRHxlWJ0uyRJ6oKquW1tJJW50K+OiPsj4ksRsWON45IkSW2w1oSemU9n5vczc0/g/wCDgadqHpkkSapaNfehExHvBo4uXiuoTJ8qSZK6iLUm9Ii4D+gJ/BoYlZlP1jyqshk3bs3bJkzovDgkSaVVTQv905n5aM0j6a5M9pKkDlDNoLh/RMSlEfFbgIjYLSKOr3FckiSpDapJ6BOBW4F3FeuPAafXKiBJktR21ST0rTLzamAlQGYupzIwTpIkdRHVJPRXI2JL3pw+dR9gSU2jkiRJbVLNoLgzgBuAnSPiHqAvcGRNo5IkSW1SzfSpMyLiw8B7qUyf+igwtNaBSZKk6q0xoUdED+AoYHvgt5n5UEQcDFwC9Abe3zkhSpKktWmthX4psAPwF+CiiPgbsCfwlcy8rjOCkyRJ1WktoTcCgzNzZUT0Av4O7JyZizsnNEmSVK3WRrkvzcymW9VeB540mUuS1DW11kIfEBGziuWgMsp9VrGcmTm45tFJkqSqtJbQ39dpUUiSpHWyxoSemU93ZiCSJKn9qnlSnCRJ6uJM6JIklcAaE3pE3F68f6/zwpEkSe3R2qC47SJiX+CTEXEVldHtzTJzRk0jW9+MG9ehh5v69FQm3dixx+zKxlX5XSeMnFDjSCRp/dRaQv8G8HWgH/DDVbYl8JFaBSVJktqmtVHuU4ApEfH1zDy3E2OSJEltVM1sa+dGxCeB/YqiOzPzptqGJUmS2mKto9wj4rvA54G5xevzEfGftQ5MkiRVb60tdOATQEPTc90j4nLgAeCrtQxMkiRVr9r70DdrsbxpLQKRJEntV00L/bvAAxFxB5Vb1/YDzq5pVJIkqU2qGRR3ZUTcCexVFH05M/9e06gkSVKbVNNCJzOfA26ocSySJKmdfJa7JEklYEKXJKkEWu1yj4gewEOZOaCT4lELYy6e2ur2SZ/br9XtkqTuo9UWemauAB6NiB07KR5JktQO1QyK2xx4KCL+ArzaVJiZn6xZVJIkqU2qSehfb8+BI+Iy4GBgYWYOLMq2ACYD/YH5wFGZ+VJEBPAT4OPAv4CxTs8qSVL11jooLjPvopJ8exbL9wPVJNuJwEdXKTsbuD0zdwFu580H1HwM2KV4nQT8tIrjS5Kkwlpb6BFxIpUkuwWwM7A9MB4Y3trnMnNqRPRfpfgQYP9i+XLgTuDLRfkVmZnAvRGxWURsV9z/LjUbd+O4qvabMHJCjSORpK6lmtvWPgcMA/4JkJmPA1u383zbtEjSfwe2KZa3B55psd+CokySJFWhmoT+RmYubVqJiA2BXNcTF63xNh8nIk6KiGkRMW3RokXrGoYkSaVQTUK/KyK+CvSOiAOBXwM3tvN8z0fEdgDF+8Ki/Flghxb79SvK3iYzL8nMxsxs7Nu3bzvDkCSpXKpJ6GcDi4DZwDjgFuD/tvN8NwDHFsvHAte3KP90VOwDLPH6uSRJ1atmtrWVEXE5cB+VLvJHi+7yVkXElVQGwG0VEQuAbwLnA1dHxPHA08BRxe63ULllbR6V29aOa/tXkSSp+6pmlPsnqIxqf4LKfOg7RcS4zPxta5/LzGPWsOlto+OLHwifW3u4kiRpdap5sMwPgAMycx5AROwM3Ay0mtAlSVLnqeYa+stNybzwJPByjeKRJEntsMYWekQcXixOi4hbgKupXEMfReVpcZIkqYtorct9ZIvl54EPF8uLgN41i0jqAD5RTlJ3s8aEnpmONJckaT1RzSj3nYBTqcyQ1ry/06dKktR1VDPK/TrgUipPh1tZ23AkSVJ7VJPQX8/MC2seiSRJardqEvpPIuKbwG3AG02FmVnNnOiSJKkTVJPQBwGfAj7Cm13uWaxLkqQuoJqEPgp4T8spVCVJUtdSzZPi5gCb1ToQSZLUftW00DcDHomI+3nrNfTud9vauOoeViJJUmerJqF/s+ZRSJKkdVLNfOh3dUYgUj34iFhJZVHNk+JepjKqHeAdQE/g1cx8Zy0DkyRJ1aumhb5J03JEBHAIsE8tg5IkSW1TzSj3ZllxHTCiRvFIkqR2qKbL/fAWqxsAjcDrNYtIkiS1WTWj3FvOi74cmE+l212SJHUR1VxDd150SZK6uDUm9Ij4Riufy8w8twbxSJKkdmithf7qaso2Bo4HtgRM6JIkdRFrTOiZ+YOm5YjYBPg8cBxwFfCDNX1OkiR1vlavoUfEFsAZwBjgcmBIZr7UGYFJkqTqtXYN/QLgcOASYFBmvtJpUUmSpDZp7cEyXwTeBfxf4G8R8c/i9XJE/LNzwpMkSdVo7Rp6m54iJ0mS6sekLUlSCZjQJUkqARO6JEklYEKXJKkEqpmcRR1o6tNT6x2CJKmEbKFLklQCJnRJkkrAhC5JUgmY0CVJKgETuiRJJWBClySpBEzokiSVgAldkqQSMKFLklQCJnRJkkrAhC5JUgmY0CVJKgEnZ5E60Lgbx1W134SRE2ociaTuxoQuVaHaRC1J9WKXuyRJJWBClySpBEzokiSVgNfQpTpw8JykjlaXhB4R84GXgRXA8sxsjIgtgMlAf2A+cFRmvlSP+CRJWt/Us8v9gMxsyMzGYv1s4PbM3AW4vViXJElV6ErX0A8BLi+WLwcOrWMskiStV+qV0BO4LSKmR8RJRdk2mflcsfx3YJv6hCZJ0vqnXoPiPpiZz0bE1sDvI+KRlhszMyMiV/fB4gfASQA77rhj7SOVJGk9UJcWemY+W7wvBK4FhgLPR8R2AMX7wjV89pLMbMzMxr59+3ZWyJIkdWmdntAjYuOI2KRpGTgImAPcABxb7HYscH1nxyZJ0vqqHl3u2wDXRkTT+X+Vmb+LiPuBqyPieOBp4Kg6xCZJ0nqp0xN6Zj4J7LGa8sXA8M6OR5KkMuhKt61JkqR2MqFLklQCJnRJkkrAhC5JUgk425pUAs7eJskWuiRJJWBClySpBEzokiSVgAldkqQScFBcSY25eOoat0363H6dGIkkqTOY0PUW/hCQpPWTXe6SJJWACV2SpBKwy3091lr3uMqh2gfGSJItdEmSSsCELklSCZjQJUkqARO6JEkl4KC4bsjBdJJUPib0lsY5oliStH6yy12SpBIwoUuSVAJ2uUvdSLUPqpkwckKNI5HU0WyhS5JUArbQ1WGcqU2S6scWuiRJJWALXdLbeK1dWv+Y0NUp7I6XpNqyy12SpBIwoUuSVAImdEmSSsCELklSCTgoroNMfdoZzNT9OBpe6jpsoUuSVAImdEmSSsCELklSCZjQJUkqAQfFqWqtPe1NklRfJvS1cPR61+XjZCXpTSZ0dWn2CnQv1d4GB94Kp/rqirdsmtBVdybt8mtLoq6XrvgftNQWDoqTJKkEbKFLWi+tD61+qTOZ0FVKa+vGd9Cc2suueXVVdrlLklQCttAlaT1gz4DWxha6JEklYEKXJKkE7HJXt9Tee99bG0znk+vUHh09Wt+u+e7LhC61QXt/CKzLqPv2/lDwB4Y6gj8Q1h8mdKkL8Gl56i78gVA7XS6hR8RHgZ8APYCfZ+b5dQ5J6la8h7976G4P5ukOPyS6VEKPiB7AxcCBwALg/oi4ITPn1jcySU262viDrnppobslzI5WrwS8Pv+7dbVR7kOBeZn5ZGYuBa4CDqlzTJIkdXldqoUObA8802J9AbB3nWKRurxaDdKrha52znoMKOyqvQkqh8jMesfQLCKOBD6amScU658C9s7MU1rscxJwUrH6XuDRdTztVsAL63gMWY8dxXpcd9Zhx7AeO0Yt6vHdmdl31cKu1kJ/FtihxXq/oqxZZl4CXNJRJ4yIaZnZ2FHH666sx45hPa4767BjWI8dozPrsatdQ78f2CUidoqIdwCjgRvqHJMkSV1el2qhZ+byiDgFuJXKbWuXZeZDdQ5LkqQur0sldIDMvAW4pRNP2WHd992c9dgxrMd1Zx12DOuxY3RaPXapQXGSJKl9uto1dEmS1A7dNqFHxEcj4tGImBcRZ9c7nq4mIi6LiIURMadF2RYR8fuIeLx437woj4i4sKjLWRExpMVnji32fzwijq3Hd6mniNghIu6IiLkR8VBEfL4oty7bICJ6RcRfIuLBoh6/VZTvFBH3FfU1uRhMS0RsVKzPK7b3b3GsrxTlj0bEiPp8o/qJiB4R8UBE3FSsW4ftEBHzI2J2RMyMiGlFWX3/rjOz272oDLh7AngP8A7gQWC3esfVlV7AfsAQYE6Lsu8DZxfLZwPfK5Y/DvwWCGAf4L6ifAvgyeJ982J583p/t06ux+2AIcXyJsBjwG7WZZvrMYA+xXJP4L6ifq4GRhfl44H/KJY/C4wvlkcDk4vl3Yq/942AnYr/B3rU+/t1cl2eAfwKuKlYtw7bV4/zga1WKavr33V3baH7iNm1yMypwIurFB8CXF4sXw4c2qL8iqy4F9gsIrYDRgC/z8wXM/Ml4PfAR2sffdeRmc9l5oxi+WXgYSpPRLQu26Coj1eK1Z7FK4GPAFOK8lXrsal+pwDDIyKK8qsy843MfAqYR+X/g24hIvoBnwB+XqwH1mFHquvfdXdN6Kt7xOz2dYplfbJNZj5XLP8d2KZYXlN9Ws8tFF2W76fSurQu26joKp4JLKTyH98TwD8yc3mxS8s6aa6vYvsSYEusxx8DZwEri/UtsQ7bK4HbImJ6VJ5gCnX+u+5yt61p/ZCZGRHeIlGliOgD/AY4PTP/WWnoVFiX1cnMFUBDRGwGXAsMqHNI65WIOBhYmJnTI2L/esdTAh/MzGcjYmvg9xHxSMuN9fi77q4t9LU+Ylar9XzRTUTxvrAoX1N9Ws9ARPSkkswnZeY1RbF12U6Z+Q/gDuADVLoumxomLeukub6K7ZsCi+ne9TgM+GREzKdymfEjwE+wDtslM58t3hdS+YE5lDr/XXfXhO4jZtvnBqBpFOaxwPUtyj9djOTcB1hSdDvdChwUEZsXoz0PKsq6jeKa46XAw5n5wxabrMs2iIi+RcuciOgNHEhlPMIdwJHFbqvWY1P9Hgn8b1ZGId0AjC5GcO8E7AL8pXO+RX1l5lcys19m9qfyf97/ZuYYrMM2i4iNI2KTpmUqf49zqPffdb1HCtbrRWXU4WNUrsN9rd7xdBb63ukAAARUSURBVLUXcCXwHLCMynWd46lcP7sdeBz4A7BFsW8AFxd1ORtobHGcz1AZNDMPOK7e36sO9fhBKtfaZgEzi9fHrcs21+Ng4IGiHucA3yjK30MlmcwDfg1sVJT3KtbnFdvf0+JYXyvq91HgY/X+bnWqz/15c5S7ddj2+nsPlZH+DwIPNeWQev9d+6Q4SZJKoLt2uUuSVComdEmSSsCELklSCZjQJUkqARO6JEklYEKXupCIeGXte63T8U+PiH/riPMV9yH/oZht6uhVtk2MiKeKbQ9GxPB2nqMxIi5sb4xSd+KjX6Xu5XTgl8C/OuBY7wfIzIY1bD8zM6dExAHAJVQeQNImmTkNmNb+EKXuwxa61MVFxM4R8btiEoi7I2JAUT6xmGP5TxHxZEQcWZRvEBH/HRGPFHMy3xIRR0bEacC7gDsi4o4Wx/9O0Yq+NyK2Wc35t4iI64p5nO+NiMHF86t/CexVtMJ3buUr/JliwoligpULIuL+4njjivKrIuITLc45sYh5/3hz3u6NI+KyqMyL/kBEHFKU3xwRg4vlByLiG8XytyPixPbXvLR+MaFLXd8lwKmZuSfwJeC/W2zbjsrT6A4Gzi/KDgf6U5m3+lNUnnlOZl4I/A04IDMPKPbdGLg3M/cApgKrS4DfAh7IzMHAV6lMA7kQOAG4OzMbMvOJVuL/KHBdsXw8lcde7gXsBZxYPD50MnAUQPE45uHAzasc52tUHj86FDgAuKB47ObdwIciYlNgOZVnlgN8qPhOUrdgl7vUhUVllrZ9gV/HmzO0bdRil+sycyUwt0Xr+oPAr4vyv7dsja/GUuCmYnk6lWekr+qDwBEAmfm/EbFlRLyzivAviIj/pDLhxAeKsoOAwU29CVQm/NgF+C3wk4jYiMoPgKmZ+VqL79z02U9GxJeK9V7AjlQS+mnAU1R+BBxYjBPYKTMfrSJOqRRM6FLXtgGV+arXdJ36jRbLsYZ9WrMs33z+8wo69v+EpmvopwKXAXtSifHUzHzbBBQRcScwAjiaymxgb9sFOGLVJF206BuBJ6nMk74VlZ6G6R33VaSuzy53qQvLzH8CT0XEKKjM3hYRe6zlY/cARxTX0rehMhFHk5eBTdoYxt3AmOL8+wMvFHFV6/8BG0TECCozSf1HVKaUJSJ2LbrNodLtfhyVrvLfreY4twKnFjPYERFNg/KWAs8Ao6hcr7+byqUJu9vVrZjQpa7l3yJiQYvXGVSS6fER0TSz0yFrOcZvqMyQN5fKwLUZwJJi2yXA79bSDb+qc4A9I2IWlev0x7a++1sVPQDnAWcBPy/imhERc4AJvNkrcBvwYeAPRZJe1blAT2BWRDxUrDe5G1iYma8Vy/2Kd6nbcLY1qYQiok9mvhIRW1KZ+nJYZv693nFJqh2voUvldFNEbAa8AzjXZC6Vny10SZJKwGvokiSVgAldkqQSMKFLklQCJnRJkkrAhC5JUgmY0CVJKoH/D8Bp4Dyk/pZmAAAAAElFTkSuQmCC\n"
-          },
-          "metadata": {
-            "needs_background": "light"
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Negative class analysis\n",
-        "fig, ax = plt.subplots(figsize=(8, 5));\n",
-        "ax.hist(TN.text_length, bins=40, color='g', alpha=0.6, label=\"True negatives\")\n",
-        "ax.hist(FP.text_length, bins=40, color='r', alpha=0.6, label=\"False postives\")\n",
-        "ax.set_xlabel(\"Length of Review\")\n",
-        "ax.set_ylabel(\"Number of Reviews\")\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 334
-        },
-        "id": "tqKIWfGOlYQK",
-        "outputId": "bc20b272-34ba-4765-ef78-727dd80ce78d"
-      },
-      "execution_count": 107,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<Figure size 576x360 with 1 Axes>"
-            ],
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfQAAAE9CAYAAAD9MZD2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de7iVZZn48e8NongKEZFUVKhhMg8IulEcRkdlPGueQjJLPBRM+SvNMrGpdCYrZ3RKbdRkyhF/aUqYSobmIRB1fh4AEQ018TSCB5BSBDJB7t8f693bLW72XhtYe2/e/f1c17rW+z7vu551r2df177X+7zPep7ITCRJ0vqtS3sHIEmS1p4JXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSqBDdo7gLWx1VZbZb9+/do7DEmS2sSMGTPeyMzeTR1brxN6v379mD59enuHIUlSm4iIl1Z3zC53SZJKwIQuSVIJmNAlSSqB9foeuiSpOsuXL2fevHm888477R2KqtC9e3f69u1Lt27dqn6NCV2SOoF58+ax+eab069fPyKivcNRMzKTRYsWMW/ePPr371/16+xyl6RO4J133qFXr14m8/VARNCrV69W96aY0CWpkzCZrz/W5G9lQpck1dyiRYsYNGgQgwYN4qMf/Sjbbbddw/67777b3uFV7dJLL2XZsmUN+4cffjhvvvlmO0b0Pu+hS1InNOY3Y9ZpfVcfdXWzx3v16sWsWbMAuOCCC9hss834xje+0XB8xYoVbLBBx09Jl156KZ/73OfYZJNNAJg8eXI7R/Q+r9AlSe3ilFNO4Z/+6Z/Ye++9+eY3v8kFF1zAJZdc0nB811135cUXXwTgF7/4BXvttReDBg1izJgxvPfeex+qr1+/fpx//vnsscce7Lbbbjz99NMALF26lNNOO4299tqLwYMHc9tttwGwbNkyTjjhBHbeeWeOPfZY9t5774bZR7/0pS9RV1fHLrvswvnnnw/A5ZdfziuvvMIBBxzAAQcc0PCeb7zxBmPHjuWKK65oiKXxZ7n44osZMmQIAwcObKhr6dKlHHHEEey+++7suuuu3HTTTWvdniZ0SVK7mTdvHv/zP//Dj370o9We89RTT3HTTTfx4IMPMmvWLLp27cr111/f5LlbbbUVM2fO5Etf+lJDQv3+97/PgQceyCOPPMKUKVM455xzWLp0KVdeeSU9e/Zkzpw5fO9732PGjBkN9Xz/+99n+vTpzJ49m/vuu4/Zs2fz1a9+lW233ZYpU6YwZcqUD7zvyJEjmTBhQsP+hAkTGDlyJHfddRfPPvssjzzyCLNmzWLGjBlMmzaNO++8k2233ZbHH3+cJ598kkMPPXRtmhGwy32NVdtd1VI3lCR1ZiNGjKBr167NnnPvvfcyY8YMhgwZAsBf/vIXtt566ybPPe644wDYc889+fWvfw3AXXfdxaRJkxoS/DvvvMP//u//8sADD3DmmWcCld6AgQMHNtQzYcIExo0bx4oVK3j11VeZM2fOB46vavDgwSxYsIBXXnmFhQsX0rNnT7bffnsuu+wy7rrrLgYPHgzAkiVLePbZZ9l33335+te/zrnnnsuRRx7JvvvuW01zNcuELklqN5tuumnD9gYbbMDKlSsb9ut/tpWZjBo1ih/+8Ict1rfRRhsB0LVrV1asWNHw+ptvvplPfOITVcX0wgsvcMkll/Doo4/Ss2dPTjnllKp+QjZixAgmTpzIa6+9xsiRIxve+7zzzmPMmA9fBM6cOZPJkyfz7W9/m+HDh/Pd7363qvhWxy53SVKH0K9fP2bOnAlUkt0LL7wAwPDhw5k4cSILFiwA4E9/+hMvvbTaRcc+5JBDDuEnP/kJmQnAY489BsCwYcMausnnzJnDE088AcDixYvZdNNN6dGjB6+//jp33HFHQ12bb745b7/9dpPvM3LkSG688UYmTpzIiBEjGt77mmuuYcmSJQDMnz+/4Up+k0024XOf+xznnHNOw+deG16hS5I6hOOPP57rrruOXXbZhb333pu//du/BWDnnXfmwgsv5OCDD2blypV069aNK664gh133LGqer/zne9w1llnMXDgQFauXEn//v25/fbb+fKXv8yoUaPYeeed2Wmnndhll13o0aMHAwYMYPDgwey0005sv/32DBs2rKGu0aNHc+ihhzbcS29sl1124e2332a77bZjm222AeDggw/mqaeeYp999gFgs8024xe/+AVz587lnHPOoUuXLnTr1o2rrrpqrdsv6r+xrGsR8Qmg8bC9jwHfBa4ryvsBLwInZOafo/Ir+suAw4FlwCmZ2exXlrq6umyv9dC9hy5pffLUU0/xyU9+sr3D6FDee+89li9fTvfu3Xnuuef4x3/8R5555hk23HDD9g4NaPpvFhEzMrOuqfNrdoWemc8Ag4oAugLzgVuAscC9mXlRRIwt9s8FDgMGFI+9gauKZ0mS1rlly5ZxwAEHsHz5cjKTK6+8ssMk8zXRVl3uw4HnMvOliDga2L8oHw9MpZLQjwauy0qXwUMRsUVEbJOZr7ZRjJKkTmTzzTenvXp5a6GtBsV9Bvhlsd2nUZJ+DehTbG8HvNzoNfOKMkmS1IKaJ/SI2BD4FPCrVY8VV+OtuokfEaMjYnpETF+4cOE6ilKSpPVbW1yhHwbMzMzXi/3XI2IbgOJ5QVE+H9i+0ev6FmUfkJnjMrMuM+t69+5dw7AlSVp/tEVCP5H3u9sBJgGjiu1RwG2Nyk+OiqHAW94/lySpOjVN6BGxKXAQ8OtGxRcBB0XEs8A/FvsAk4HngbnAfwFfrmVskqS21bVr14YlUwcNGtSw8EpTNttss7YLrBkvvvgiN9xwQ8P+9OnT+epXv9qOEa1eTUe5Z+ZSoNcqZYuojHpf9dwEzqhlPJKkQhNTka6Vq1uec2PjjTduWEJ1fVGf0D/72c8CUFdXR11dkz8Db3dO/SpJahdLlixh+PDhDcud1i9r2tirr77Kfvvtx6BBg9h11125//77gcqCK/vssw977LEHI0aMaJhatbH999+fM888s+G1jzzyCFCZOvaYY45h4MCBDB06lNmzZwNw3333NfQeDB48mLfffpuxY8dy//33M2jQIH784x8zdepUjjzySFauXEm/fv148803G95vwIABvP766yxcuJDjjz+eIUOGMGTIEB588MHV1r8umdAlSW3iL3/5S0NCO/bYY+nevTu33HILM2fOZMqUKXz9619n1dlLb7jhBg455BBmzZrF448/zqBBg3jjjTe48MILueeee5g5cyZ1dXWrXX512bJlzJo1iyuvvJLTTjsNgPPPP5/Bgwcze/ZsfvCDH3DyyScDcMkll3DFFVcwa9Ys7r//fjbeeGMuuugi9t13X2bNmsXXvva1hnq7dOnC0UcfzS233ALAww8/zI477kifPn0488wz+drXvsajjz7KzTffzBe+8IXV1r8uOZe7JKlNrNrlvnz5cr71rW8xbdo0unTpwvz583n99df56Ec/2nDOkCFDOO2001i+fDnHHHMMgwYN4r777mPOnDkNc6y/++67DXOlr+rEE08EYL/99mPx4sW8+eabPPDAA9x8880AHHjggSxatIjFixczbNgwzj77bE466SSOO+44+vbt2+znGTlyJP/6r//Kqaeeyo033tiwwto999zDnDlzGs5bvHgxS5YsaXX9rWVClyS1i+uvv56FCxcyY8YMunXrRr9+/T60TOl+++3HtGnT+O1vf8spp5zC2WefTc+ePTnooIP45S9/uZqa31dZJmT1+42NHTuWI444gsmTJzNs2DB+97vfNVv3Pvvsw9y5c1m4cCG33nor3/72twFYuXIlDz30EN27d2+x/p122qnFz1Atu9wlSe3irbfeYuutt6Zbt25MmTKlySVRX3rpJfr06cMXv/hFvvCFLzBz5kyGDh3Kgw8+yNy5cwFYunQpf/zjH5t8j5tuqqwR9sADD9CjRw969OjBvvvuy/XXXw/A1KlT2WqrrfjIRz7Cc889x2677ca5557LkCFDePrpp5tdLjUiOPbYYzn77LP55Cc/Sa9elTHgBx98MD/5yU8azqvvlWiq/nXJK3RJUrs46aSTOOqoo9htt92oq6tr8mp16tSpXHzxxXTr1o3NNtuM6667jt69e3Pttddy4okn8te//hWACy+8sGG51ca6d+/O4MGDWb58Oddccw0AF1xwAaeddhoDBw5kk002Yfz48QBceumlTJkyhS5durDLLrtw2GGH0aVLF7p27cruu+/OKaecwuDBgz9Q/8iRIxkyZAjXXnttQ9nll1/OGWecwcCBA1mxYgX77bcfP/3pT5usf12q2fKpbcHlUyWpOp1x+dT999+fSy65pMP+zKwlrV0+1S53SZJKwC53SVIpTZ06tb1DaFNeoUuSVAImdEnqJNbnMVOdzZr8rUzoktQJdO/enUWLFpnU1wOZyaJFiz70O/aWeA9dkjqBvn37Mm/ePBYuXNjeoagK3bt3b/VMciZ0SeoEunXrRv/+/ds7DNWQXe6SJJWACV2SpBIwoUuSVAImdEmSSsCELklSCZjQJUkqARO6JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAImdEmSSsCELklSCdQ0oUfEFhExMSKejoinImKfiNgyIu6OiGeL557FuRERl0fE3IiYHRF71DI2SZLKpNZX6JcBd2bmTsDuwFPAWODezBwA3FvsAxwGDCgeo4GrahybJEmlUbOEHhE9gP2AnwNk5ruZ+SZwNDC+OG08cEyxfTRwXVY8BGwREdvUKj5Jksqkllfo/YGFwH9HxGMR8bOI2BTok5mvFue8BvQptrcDXm70+nlFmSRJakEtE/oGwB7AVZk5GFjK+93rAGRmAtmaSiNidERMj4jpCxcuXGfBSpK0PqtlQp8HzMvMh4v9iVQS/Ov1XenF84Li+Hxg+0av71uUfUBmjsvMusys6927d82ClyRpfVKzhJ6ZrwEvR8QniqLhwBxgEjCqKBsF3FZsTwJOLka7DwXeatQ1L0mSmrFBjev/CnB9RGwIPA+cSuVLxISIOB14CTihOHcycDgwF1hWnCtJkqpQ04SembOAuiYODW/i3ATOqGU8kiSVlTPFSZJUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQRM6JIklYAJXZKkEjChS5JUAiZ0SZJKYIPWnBwRXYDNMnNxjeIpnTG/GVPVeVcfdXWNI5EklVmLV+gRcUNEfCQiNgWeBOZExDnVVB4RL0bEExExKyKmF2VbRsTdEfFs8dyzKI+IuDwi5kbE7IjYY20+mCRJnUk1Xe47F1fkxwB3AP2Bz7fiPQ7IzEGZWVfsjwXuzcwBwL3FPsBhwIDiMRq4qhXvIUlSp1ZNQu8WEd2oJPRJmbkcyLV4z6OB8cX2+KLe+vLrsuIhYIuI2GYt3keSpE6jmoR+NfAisCkwLSJ2BKq9h57AXRExIyJGF2V9MvPVYvs1oE+xvR3wcqPXzivKJElSC1ocFJeZlwOX1+9HxP8CB1RZ/99n5vyI2Bq4OyKeXqXujIhWXe0XXwxGA+ywww6teWmLqh3AJklSR1PNoLjnIuL6iPiniNil6BJfUU3lmTm/eF4A3ALsBbxe35VePC8oTp8PbN/o5X2LslXrHJeZdZlZ17t372rCkCSp9KoaFEel270XcHGR4G9p6UURsWlEbF6/DRxMZZT8JGBUcdoo4LZiexJwcjHafSjwVqOueUmS1Ixqfof+HrC8eF5J5Yp6QbOvqOgD3BIR9e9zQ2beGRGPAhMi4nTgJeCE4vzJwOHAXGAZcGorPockSZ1aNQl9MfAE8CPgvzJzUTUVZ+bzwO5NlC8ChjdRnsAZ1dQtSZI+qJou9xOBacCXgRsj4l8i4kMJWZIktZ9qRrnfBtwWETtRmfzlLOCbwMY1jk2SJFWpmlHuN0fEXOAyYBPgZKBnrQOTJEnVq+Ye+g+BxzLzvVoHI0mS1kw199DnAOdFxDiAiBgQEUfWNixJktQa1ST0/wbeBf6u2J8PXFiziCRJUqtVk9A/npn/TuW36GTmMiBqGpUkSWqVahL6uxGxMcUKaxHxceCvNY1KkiS1SjWD4s4H7gS2j4jrgWHAKbUMSpIktU41v0O/OyJmAkOpdLWfmZlv1DwySZJUtdV2uRcTyRARewA7Aq8CrwA7FGWSJKmDaO4K/Wwq647/RxPHEjiwJhFJkqRWW21Cz8zRxfMBbReOJElaE9VM/To7Is4rRrdLkqQOqJqfrR1FZS30CRHxaER8IyJ2qHFckiSpFVpM6Jn5Umb+e2buCXwWGAi8UPPIJElS1ar5HToRsSMwsni8R2X5VEmS1EG0mNAj4mGgG/ArYERmPl/zqCRJUqtUc4V+cmY+U/NIJEnSGqtmUNybEfHziLgDICJ2jojTaxyXJElqhWoS+rXA74Bti/0/AmfVKiBJktR61ST0rTJzArASIDNXUBkYJ0mSOohqEvrSiOjF+8unDgXeqmlUkiSpVaoZFHc2MAn4eEQ8CPQGPl3TqCRJUqtUs3zqzIj4B+ATVJZPfQbYq9aBSZKk6q02oUdEV+AEYDvgjsz8Q0QcCYwDNgYGt02IkiSpJc1dof8c2B54BPhJRLwC7Amcl5m3tkVwkiSpOs0l9DpgYGaujIjuwGvAxzNzUWveoLjSnw7Mz8wjI6I/cCPQC5gBfD4z342IjYDrqHxpWASMzMwXW/2JJEnqhJob5f5uZtb/VO0d4PnWJvPCmcBTjfb/DfhxZv4N8GegfpKa04E/F+U/Ls6TJElVaC6h71SshT47Ip5otP9ERMyupvKI6AscAfys2A/gQGBiccp44Jhi++hin+L48OJ8SZLUgua63D+5Duq/lMrKbJsX+72AN4vJaQDmURl0R/H8MlQmr4mIt4rz31gHcUiSVGqrTeiZ+dLaVFyMiF+QmTMiYv+1qWuVekcDowF22GGHdVWtJEnrtWpmiltTw4BPRcSLVAbBHQhcBmwREfVfJPoC84vt+VRG1VMc70FlcNwHZOa4zKzLzLrevXvXMHxJktYfNUvomXleZvbNzH7AZ4DfZ+ZJwBTen2luFHBbsT2p2Kc4/vvMzFrFJ0lSmaw2oUfEvcXzuh5tfi5wdkTMpXKP/OdF+c+BXkX52cDYdfy+kiSVVnOD4raJiL+j0m1+I5VpXxtk5sxq3yQzpwJTi+3naWLq2OKncSOqrVOSJL2vuYT+XeA7VO5z/2iVY0nlnrgkSeoAmhvlPhGYGBHfyczvtWFMkiSplapZbe17EfEpYL+iaGpm3l7bsCRJUmu0OMo9In5IZfrWOcXjzIj4Qa0DkyRJ1WvxCp3K1K2D6ud1j4jxwGPAt2oZmCRJql61v0PfotF2j1oEIkmS1lw1V+g/BB6LiClUfrq2H/5GXJKkDqWaQXG/jIipwJCi6NzMfK2mUUmSpFap5gqdzHyVytSskiSpA6rl4iySJKmNmNAlSSqBZhN6RHSNiKfbKhhJkrRmmk3omfke8ExE7NBG8UiSpDVQzaC4nsAfIuIRYGl9YWZ+qmZRSZKkVqkmoX+n5lFIkqS1Us3v0O+LiB2BAZl5T0RsAnStfWiSJKla1SzO8kVgInB1UbQdcGstg5IkSa1Tzc/WzgCGAYsBMvNZYOtaBiVJklqnmoT+18x8t34nIjYAsnYhSZKk1qomod8XEd8CNo6Ig4BfAb+pbViSJKk1qknoY4GFwBPAGGAy8O1aBiVJklqnmlHuKyNiPPAwla72ZzLTLndJkjqQFhN6RBwB/BR4jsp66P0jYkxm3lHr4CRJUnWqmVjmP4ADMnMuQER8HPgtYEKXJKmDqOYe+tv1ybzwPPB2jeKRJElrYLVX6BFxXLE5PSImAxOo3EMfATzaBrFJkqQqNdflflSj7deBfyi2FwIb1ywiSZLUaqtN6Jl56tpUHBHdgWnARsX7TMzM8yOiP3Aj0AuYAXw+M9+NiI2A64A9gUXAyMx8cW1ikCSps6hmLvf+EfGjiPh1REyqf1RR91+BAzNzd2AQcGhEDAX+DfhxZv4N8Gfg9OL804E/F+U/Ls6TJElVqGaU+63Az6nMDrey2oqL36ovKXa7FY8EDgQ+W5SPBy4ArgKOLrahshjMf0ZE+Jt3SZJaVk1CfyczL1+TyiOiK5Vu9b8BrqDyW/Y3M3NFcco8Kqu3UTy/DJCZKyLiLSrd8m+sUudoYDTADjvssCZhSZJUOtUk9Msi4nzgLird6ABk5syWXpiZ7wGDImIL4BZgpzUNtFGd44BxAHV1daW5eh/zmzFVnXf1UVe3fJIkqdOpJqHvBnyeSld5fZd7fdd5VTLzzYiYAuwDbBERGxRX6X2B+cVp84HtgXnFim49qAyOkyRJLagmoY8APtZ4CdVqRERvYHmRzDcGDqIy0G0K8GkqI91HAbcVL5lU7P+/4vjvvX8uSVJ1qknoTwJbAAtaWfc2wPjiPnoXYEJm3h4Rc4AbI+JC4DEqA+4onv9vRMwF/gR8ppXvJ0lSp1VNQt8CeDoiHuWD99A/1dyLMnM2MLiJ8ueBvZoof4dKb4AkSWqlahL6+TWPQpIkrZVq1kO/ry0CkSRJa66a9dDfpjKqHWBDKhPELM3Mj9QyMEmSVL1qrtA3r9+OiKAyo9vQWgYlSZJap5r10Btkxa3AITWKR5IkrYFqutyPa7TbBagD3qlZRJIkqdWqGeXeeF30FcCLVLrdJUlSB1HNPfS1WhddkiTV3moTekR8t5nXZWZ+rwbxSJKkNdDcFfrSJso2BU6nsqypCV2SpA5itQk9M/+jfjsiNgfOBE6lsqjKf6zudZIkqe01ew89IrYEzgZOAsYDe2Tmn9siMEmSVL3m7qFfDBwHjAN2y8wlbRaVJElqleYmlvk6sC3wbeCViFhcPN6OiMVtE54kSapGc/fQWzWLnCRJaj8mbUmSSsCELklSCZjQJUkqARO6JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAImdEmSSsCELklSCZjQJUkqgZol9IjYPiKmRMSciPhDRJxZlG8ZEXdHxLPFc8+iPCLi8oiYGxGzI2KPWsUmSVLZ1PIKfQXw9czcGRgKnBEROwNjgXszcwBwb7EPcBgwoHiMBq6qYWySJJVKzRJ6Zr6amTOL7beBp4DtgKOB8cVp44Fjiu2jgeuy4iFgi4jYplbxSZJUJm1yDz0i+gGDgYeBPpn5anHoNaBPsb0d8HKjl80ryiRJUgtqntAjYjPgZuCszFzc+FhmJpCtrG90REyPiOkLFy5ch5FKkrT+qmlCj4huVJL59Zn566L49fqu9OJ5QVE+H9i+0cv7FmUfkJnjMrMuM+t69+5du+AlSVqP1HKUewA/B57KzB81OjQJGFVsjwJua1R+cjHafSjwVqOueUmS1IwNalj3MODzwBMRMaso+xZwETAhIk4HXgJOKI5NBg4H5gLLgFNrGJskSaVSs4SemQ8AsZrDw5s4P4EzahWPJEll5kxxkiSVgAldkqQSMKFLklQCtRwUpxoY85sxVZ139VFX1zgSSVJH4hW6JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAImdEmSSsCELklSCZjQJUkqARO6JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAImdEmSSsCELklSCZjQJUkqARO6JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAImdEmSSsCELklSCdQsoUfENRGxICKebFS2ZUTcHRHPFs89i/KIiMsjYm5EzI6IPWoVlyRJZVTLK/RrgUNXKRsL3JuZA4B7i32Aw4ABxWM0cFUN45IkqXRqltAzcxrwp1WKjwbGF9vjgWMalV+XFQ8BW0TENrWKTZKksmnre+h9MvPVYvs1oE+xvR3wcqPz5hVlkiSpCu02KC4zE8jWvi4iRkfE9IiYvnDhwhpEJknS+qetE/rr9V3pxfOConw+sH2j8/oWZR+SmeMysy4z63r37l3TYCVJWl+0dUKfBIwqtkcBtzUqP7kY7T4UeKtR17wkSWrBBrWqOCJ+CewPbBUR84DzgYuACRFxOvAScEJx+mTgcGAusAw4tVZxSZJURjVL6Jl54moODW/i3ATOqFUskiSVnTPFSZJUAiZ0SZJKoGZd7qq9k66YtvqDt4+Bq69uu2AkSe3KhF5mY8as/pjJXpJKxS53SZJKwIQuSVIJmNAlSSoBE7okSSVgQpckqQQc5d5ZNTcCHhwFL0nrGa/QJUkqARO6JEklYJe7mmaXvCStV0zoJTXtpWamhW1kvx33q3EkkqS2YJe7JEklYEKXJKkE7HLvwJpdTa2j8x68JLUpE3o76ggJe43vtbeUsCVJbcoud0mSSsCELklSCZjQJUkqARO6JEkl4KA4dTyOkJekVjOht6C5kejXn+Esa2vMUfKStE6Z0NdCSz87M+FLktqKCb2GOsLvzNeVan+vDh1gfni77CV1QiZ0qTX8siCpg+pQCT0iDgUuA7oCP8vMi9o5JHVEq0mqVc96t4b1S1JH1mESekR0Ba4ADgLmAY9GxKTMnNO+kam1OvzSrSZsSSXUYRI6sBcwNzOfB4iIG4GjARN6SbXmvvx6o9ZfFuzSl7QaHSmhbwe83Gh/HrB3O8WiEluvv0gc+sn2jgBYTe9Kc182qhh7MOY31X0ZuvqoGnypWZsvYi19yarluIu1/QK5jr4gtuvfrp10xM8cmdlmb9aciPg0cGhmfqHY/zywd2b+n1XOGw2MLnY/ATyzlm+9FfDGWtahD7Nda8e2rQ3btTZs13Vrx8zs3dSBjnSFPh/YvtF+36LsAzJzHDBuXb1pREzPzLp1VZ8qbNfasW1rw3atDdu17XSkudwfBQZERP+I2BD4DDCpnWOSJGm90GGu0DNzRUT8H+B3VH62dk1m/qGdw5Ikab3QYRI6QGZOBia38duus+57fYDtWju2bW3YrrVhu7aRDjMoTpIkrbmOdA9dkiStoU6d0CPi0Ih4JiLmRsTY9ngDg9AAAAc/SURBVI6no4uIayJiQUQ82ahsy4i4OyKeLZ57FuUREZcXbTs7IvZo9JpRxfnPRsSo9vgsHUlEbB8RUyJiTkT8ISLOLMpt27UQEd0j4pGIeLxo138pyvtHxMNF+91UDMIlIjYq9ucWx/s1quu8ovyZiDikfT5RxxIRXSPisYi4vdi3XdtbZnbKB5WBd88BHwM2BB4Hdm7vuDryg8o06HsATzYq+3dgbLE9Fvi3Yvtw4A4ggKHAw0X5lsDzxXPPYrtne3+2dm7XbYA9iu3NgT8CO9u2a92uAWxWbHcDHi7aawLwmaL8p8CXiu0vAz8ttj8D3FRs71z8f9gI6F/83+ja3p+vvR/A2cANwO3Fvu3azo/OfIXeMNVsZr4L1E81q9XIzGnAn1YpPhoYX2yPB45pVH5dVjwEbBER2wCHAHdn5p8y88/A3cChtY++48rMVzNzZrH9NvAUlZkTbdu1ULTPkmK3W/FI4EBgYlG+arvWt/dEYHhERFF+Y2b+NTNfAOZS+f/RaUVEX+AI4GfFfmC7trvOnNCbmmp2u3aKZX3WJzNfLbZfA/oU26trX9u9GUV35GAqV5O27VoquoVnAQuofMF5DngzM1cUpzRuo4b2K46/BfTCdm3KpcA3gZXFfi9s13bXmRO61rGs9KP5s4k1FBGbATcDZ2Xm4sbHbNs1k5nvZeYgKjNP7gXs1M4hrfci4khgQWbOaO9Y9EGdOaFXNdWsWvR60d1L8bygKF9d+9ruTYiIblSS+fWZ+eui2LZdRzLzTWAKsA+VWxT1c3A0bqOG9iuO9wAWYbuuahjwqYh4kcqtygOBy7Bd211nTuhONbtuTALqR1OPAm5rVH5yMSJ7KPBW0X38O+DgiOhZjNo+uCjrtIr7iT8HnsrMHzU6ZNuuhYjoHRFbFNsbAwdRGZ8wBfh0cdqq7Vrf3p8Gfl/0jEwCPlOM1u4PDAAeaZtP0fFk5nmZ2Tcz+1H5v/n7zDwJ27X9tfeovPZ8UBkt/Ecq99X+ub3j6egP4JfAq8ByKve7TqdyL+xe4FngHmDL4twArija9gmgrlE9p1EZADMXOLW9P1d7P4C/p9KdPhuYVTwOt23Xul0HAo8V7fok8N2i/GNUEsdc4FfARkV592J/bnH8Y43q+ueivZ8BDmvvz9ZRHsD+vD/K3XZt54czxUmSVAKductdkqTSMKFLklQCJnRJkkrAhC5JUgmY0CVJKgETutSBRMSSls9aq/rPiohN1sX7Fb8fviciZkXEyFWOXRsRLxTHHo+I4Wv4HnURcfmaxih1Jhu0fIqkEjkL+AWwbB3UNRggK1OrNuWczJwYEQcA46hMHNIqmTkdmL7mIUqdh1foUgcXER+PiDsjYkZE3B8ROxXl1xbrov9PRDwfEZ8uyrtExJUR8XRU1lGfHBGfjoivAtsCUyJiSqP6v19cRT8UEX2aeP8tI+LWqKy9/lBEDIyIral8MRhSXIV/vJmP8P8oFt0oFku5OCIeLeobU5TfGBFHNHrPa4uY92+03vamEXFNVNY4fywiji7KfxsRA4vtxyLiu8X2v0bEF9e85aX1iwld6vjGAV/JzD2BbwBXNjq2DZWZ5o4ELirKjgP6UVlv+vNU5i8nMy8HXgEOyMwDinM3BR7KzN2BaUBTCfBfgMcycyDwLSpLty4AvgDcn5mDMvO5ZuI/FLi12D6dylS1Q4AhwBeLaT9vAk4AKKZiHg78dpV6/pnKtKF7AQcAF0fEpsD9wL4R0QNYQWWucYB9i88kdQp2uUsdWLEC298Bv6pM+Q7ARo1OuTUzVwJzGl1d/z3wq6L8tcZX4014F7i92J5BZb7zVf09cDxAZv4+InpFxEeqCP/iiPgBlUU39inKDgYG1vcmUFmoYwBwB3BZRGxE5QvAtMz8S6PPXP/aT0XEN4r97sAOVBL6V4EXqHwJOKgYJ9A/M5+pIk6pFEzoUsfWhco606u7T/3XRtuxmnOaszzfn//5Pdbt/4T6e+hfAa4B9qQS41cy80OLxkTEVOAQYCSVVbw+dApw/KpJuriirwOep7Lm+VZUehpc3lOdil3uUgeWlXXRX4iIEVBZmS0idm/hZQ8Cxxf30vtQWUCj3tvA5q0M437gpOL99wfeyFXWa2/BfwJdIuIQKqu/fSkqy8USEX9bdJtDpdv9VCpd5Xc2Uc/vgK8Uq9MREfWD8t4FXgZGULlffz+VWxN2t6tTMaFLHcsmETGv0eNsKsn09Ih4HPgDcHQLddxMZTW8OVQGrs0E3iqOjQPubKEbflUXAHtGxGwq9+lHNX/6BxU9ABcC3wR+VsQ1MyKeBK7m/V6Bu4B/AO4pkvSqvgd0A2ZHxB+K/Xr3Awsy8y/Fdt/iWeo0XG1NKqGI2Cwzl0RELypLVg7LzNfaOy5JteM9dKmcbo+ILYANge+ZzKXy8wpdkqQS8B66JEklYEKXJKkETOiSJJWACV2SpBIwoUuSVAImdEmSSuD/Axeq0BhbdLOUAAAAAElFTkSuQmCC\n"
-          },
-          "metadata": {
-            "needs_background": "light"
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "wtgEtG-E56mJ"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Trying best RoBERTa model with Bilal data"
-      ],
-      "metadata": {
-        "id": "ZoA7aIoq56tW"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "train_bilal = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/Paper_Data/train.csv\", encoding='latin-1')\n",
-        "test_bilal = pd.read_csv(\"/content/drive/My Drive/Colab Notebooks/W266 Project/data/Paper_Data/test.csv\", encoding='latin-1')\n",
-        "x_train_bilal = list(train_bilal[['sentence']].sentence)\n",
-        "y_train_bilal = np.asarray(train_bilal[['label']].label)\n",
-        "\n",
-        "x_test_bilal = list(test_bilal[['sentence']].sentence)\n",
-        "y_test_bilal = np.asarray(test_bilal[['label']].label)\n",
-        "\n",
-        "x_train_bilal, x_valid_bilal, y_train_bilal, y_valid_bilal = train_test_split(x_train_bilal, y_train_bilal, train_size=0.9)"
-      ],
-      "metadata": {
-        "id": "uUiOIUsv5_eE"
-      },
-      "execution_count": 10,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "max_lengths = [384]\n",
-        "for length in max_lengths:\n",
-        "  run_roberta(length, roberta_tokenizer, str.format('roberta_model_bilal_{length}', length = length), \n",
-        "                                                    x_train = x_train_bilal,\n",
-        "                                                    y_train = y_train_bilal,\n",
-        "                                                    x_valid = x_valid_bilal,\n",
-        "                                                    y_valid = y_valid_bilal,\n",
-        "                                                    x_test = x_test_bilal,\n",
-        "                                                    y_test = y_test_bilal)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "KpAMuqDM6Nts",
-        "outputId": "7a525a79-fc00-4bc9-9416-09557f37ae95"
-      },
-      "execution_count": 13,
-      "outputs": [
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "This model will be saved as roberta_model_bilal_384\n",
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (7200, 384)\n",
-            "Created encoding for validation data with shape (800, 384)\n",
-            "Created encoding for test data with shape (2000, 384)\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification_3/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "225/225 [==============================] - 141s 605ms/step - loss: 0.6754 - accuracy: 0.6044 - val_loss: 0.6657 - val_accuracy: 0.6787\n",
-            "Epoch 2/4\n",
-            "225/225 [==============================] - 134s 595ms/step - loss: 0.6565 - accuracy: 0.6597 - val_loss: 0.6535 - val_accuracy: 0.6812\n",
-            "Epoch 3/4\n",
-            "225/225 [==============================] - 134s 595ms/step - loss: 0.6486 - accuracy: 0.6704 - val_loss: 0.6430 - val_accuracy: 0.6875\n",
-            "Epoch 4/4\n",
-            "225/225 [==============================] - 134s 595ms/step - loss: 0.6399 - accuracy: 0.6765 - val_loss: 0.6372 - val_accuracy: 0.6913\n",
-            "Evaluating model...\n",
-            "63/63 [==============================] - 40s 572ms/step - loss: 0.6384 - accuracy: 0.6950\n",
-            "Test loss: 0.6383771896362305\n",
-            "Test accuracy: 0.6949999928474426\n",
-            "63/63 [==============================] - 38s 556ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.71      0.66      0.68      1000\n",
-            "           1       0.68      0.73      0.71      1000\n",
-            "\n",
-            "    accuracy                           0.69      2000\n",
-            "   macro avg       0.70      0.70      0.69      2000\n",
-            "weighted avg       0.70      0.69      0.69      2000\n",
-            "\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_bilal_384/assets\n"
-          ]
-        },
-        {
-          "metadata": {
-            "tags": null
-          },
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_bilal_384/assets\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Tryig Bilal data on RoBERTa model with linear decay learning rate"
-      ],
-      "metadata": {
-        "id": "i2E5mljpMyr8"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Try it on our our previous best max_length of 384\n",
-        "max_lengths = [384]\n",
-        "# Best initial learning rate from Yelp data set\n",
-        "learning_rates = [3e-5]\n",
-        "for initial_learning_rate in learning_rates:\n",
-        "  warmup_schedule = create_learning_schedule(initial_learning_rate)\n",
-        "  for length in max_lengths:\n",
-        "    model_name = str.format('roberta_model_bilal_{length}_lr-{learning_rate}', length=length, learning_rate=initial_learning_rate)\n",
-        "    run_roberta(length, roberta_tokenizer, model_name, \n",
-        "                                                    x_train = x_train_bilal,\n",
-        "                                                    y_train = y_train_bilal,\n",
-        "                                                    x_valid = x_valid_bilal,\n",
-        "                                                    y_valid = y_valid_bilal,\n",
-        "                                                    x_test = x_test_bilal,\n",
-        "                                                    y_test = y_test_bilal,\n",
-        "                                                    optimizer = tf.keras.optimizers.experimental.Adam(\n",
-        "      learning_rate = warmup_schedule))"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 924,
-          "referenced_widgets": [
-            "d3f7e1595b6b40fc950881a93e4e31f9",
-            "b22c33ac58544011811433e23887560b",
-            "c3051c828ebf4df0a1c7e9283c873dfc",
-            "56d650affeeb40778e4237fa5438d949",
-            "587005e79b4d47bf9bf51ee4278c25da",
-            "73f693401f7b4e69a63d8c0415696974",
-            "3a3043af8ce5447d859181fda60ce44d",
-            "4c79d9ea74454f0386411f533b3c3613",
-            "56a3e8fb03684984bdd822da6932a22b",
-            "c101251a71b94ec29e02ed5a3deafde9",
-            "85e4fb471f0d47588aed5d059938ce3b"
-          ]
-        },
-        "id": "IIJervtBMxzG",
-        "outputId": "b8859806-a706-4dc7-b9e6-ba0b1279d7f9"
-      },
-      "execution_count": 12,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "This model will be saved as roberta_model_bilal_384_lr-3e-05\n",
-            "Running roBERTa for encoding max_length: 384\n",
-            "Tokenizing data...\n",
-            "Created encoding for training data with shape (7200, 384)\n",
-            "Created encoding for validation data with shape (800, 384)\n",
-            "Created encoding for test data with shape (2000, 384)\n",
-            "Using custom optimizer\n"
-          ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading tf_model.h5:   0%|          | 0.00/627M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "d3f7e1595b6b40fc950881a93e4e31f9"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
-            "\n",
-            "Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training model...\n",
-            "Epoch 1/4\n",
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Gradients do not exist for variables ['tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._0/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._1/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._2/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._3/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._4/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._5/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._6/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._7/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._8/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._9/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._10/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/query/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/key/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/self/value/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/attention/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/intermediate/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/kernel:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/dense/bias:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/encoder/layer_._11/output/LayerNorm/beta:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/word_embeddings/weight:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/token_type_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/position_embeddings/embeddings:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/gamma:0', 'tf_roberta_for_sequence_classification/roberta/embeddings/LayerNorm/beta:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "225/225 [==============================] - 142s 594ms/step - loss: 0.6954 - accuracy: 0.5053 - val_loss: 0.6892 - val_accuracy: 0.6850\n",
-            "Epoch 2/4\n",
-            "225/225 [==============================] - 132s 586ms/step - loss: 0.6824 - accuracy: 0.5921 - val_loss: 0.6782 - val_accuracy: 0.6950\n",
-            "Epoch 3/4\n",
-            "225/225 [==============================] - 132s 586ms/step - loss: 0.6689 - accuracy: 0.6486 - val_loss: 0.6680 - val_accuracy: 0.6712\n",
-            "Epoch 4/4\n",
-            "225/225 [==============================] - 132s 586ms/step - loss: 0.6594 - accuracy: 0.6646 - val_loss: 0.6579 - val_accuracy: 0.6825\n",
-            "Evaluating model...\n",
-            "63/63 [==============================] - 34s 497ms/step - loss: 0.6598 - accuracy: 0.6705\n",
-            "Test loss: 0.6597793698310852\n",
-            "Test accuracy: 0.6704999804496765\n",
-            "63/63 [==============================] - 34s 492ms/step\n",
-            "\n",
-            " Classification Report:\n",
-            "\n",
-            "              precision    recall  f1-score   support\n",
-            "\n",
-            "           0       0.64      0.77      0.70      1000\n",
-            "           1       0.71      0.57      0.63      1000\n",
-            "\n",
-            "    accuracy                           0.67      2000\n",
-            "   macro avg       0.68      0.67      0.67      2000\n",
-            "weighted avg       0.68      0.67      0.67      2000\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:absl:Found untraced functions such as encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, dense_layer_call_fn while saving (showing 5 of 422). These functions will not be directly callable after loading.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_bilal_384_lr-3e-05/assets\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_bilal_384_lr-3e-05/assets\n"
-          ]
-        }
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "background_execution": "on",
-      "collapsed_sections": [],
-      "machine_shape": "hm",
-      "name": "RoBERTa.ipynb",
-      "provenance": []
-    },
-    "gpuClass": "standard",
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "ff91ad40e0cb44bcae67dd2eb9ff7dbe": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_69a8b638873d484694629efb8b988e93",
-              "IPY_MODEL_e92cd265403c4417b787742f8fe2756a",
-              "IPY_MODEL_e1c4ac149c324ceeae62d0528b1dd025"
-            ],
-            "layout": "IPY_MODEL_a78d6c7b85ba44e78160f5d054ad1288"
-          }
-        },
-        "69a8b638873d484694629efb8b988e93": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_738de4b5332942749435e1539cd90098",
-            "placeholder": "​",
-            "style": "IPY_MODEL_c174dbb3fbfd426687040e83e9f12421",
-            "value": "Downloading: 100%"
-          }
-        },
-        "e92cd265403c4417b787742f8fe2756a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b21b352052da4de8af45fcfabd237302",
-            "max": 657434796,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_8ca96f2c389f45d58d8b60bb34765bb3",
-            "value": 657434796
-          }
-        },
-        "e1c4ac149c324ceeae62d0528b1dd025": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_447b0a4d2c474f7e9777a23e72fc8d92",
-            "placeholder": "​",
-            "style": "IPY_MODEL_2900ac0595cc4a7fa79b4d95374c94f2",
-            "value": " 627M/627M [00:09&lt;00:00, 66.9MB/s]"
-          }
-        },
-        "a78d6c7b85ba44e78160f5d054ad1288": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "738de4b5332942749435e1539cd90098": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "c174dbb3fbfd426687040e83e9f12421": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "b21b352052da4de8af45fcfabd237302": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8ca96f2c389f45d58d8b60bb34765bb3": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "447b0a4d2c474f7e9777a23e72fc8d92": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2900ac0595cc4a7fa79b4d95374c94f2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "03d829ad4c0a419d9f75ed02c10b243e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_3f87aa1685a647bb91445fd4716b8104",
-              "IPY_MODEL_2ce390869afa4ae6ba4389319eddb44b",
-              "IPY_MODEL_a82c381fcbc743a1a110cb7f15426373"
-            ],
-            "layout": "IPY_MODEL_0c547845eac94a57be770f3e5a3be938"
-          }
-        },
-        "3f87aa1685a647bb91445fd4716b8104": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_930eb246751e49b78a358407c0f41c71",
-            "placeholder": "​",
-            "style": "IPY_MODEL_22b4ab47cfda43cda17b58583771ba9b",
-            "value": "Downloading vocab.json: 100%"
-          }
-        },
-        "2ce390869afa4ae6ba4389319eddb44b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e033dcd9a49a485c8cbb28475703975b",
-            "max": 898823,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_895da1e0a44e434e876d1f43046c7fe2",
-            "value": 898823
-          }
-        },
-        "a82c381fcbc743a1a110cb7f15426373": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b1f1a7fb3bc24ca0b10d3702c3bba1d2",
-            "placeholder": "​",
-            "style": "IPY_MODEL_14e048cd8ed041a99ed0019bdd900d19",
-            "value": " 878k/878k [00:00&lt;00:00, 1.64MB/s]"
-          }
-        },
-        "0c547845eac94a57be770f3e5a3be938": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "930eb246751e49b78a358407c0f41c71": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "22b4ab47cfda43cda17b58583771ba9b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "e033dcd9a49a485c8cbb28475703975b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "895da1e0a44e434e876d1f43046c7fe2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "b1f1a7fb3bc24ca0b10d3702c3bba1d2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "14e048cd8ed041a99ed0019bdd900d19": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d706b56424224b47897ac2ee0c0b2442": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_6b732f60756b46ecbeb96c737f4496b4",
-              "IPY_MODEL_d21e7d26b7c94843903096fae3008538",
-              "IPY_MODEL_88e4813027b148a9997d46ca01b01089"
-            ],
-            "layout": "IPY_MODEL_3ea2594afd164aae81d90acc80a451f1"
-          }
-        },
-        "6b732f60756b46ecbeb96c737f4496b4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_a9a95b664df94256a8977e656716219a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_0e0e076722cf467c8ba2c48ed7a421a5",
-            "value": "Downloading merges.txt: 100%"
-          }
-        },
-        "d21e7d26b7c94843903096fae3008538": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d121d804a688426fa389a75348c9f827",
-            "max": 456318,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_e4400fc6e334470b883895b61c3d6037",
-            "value": 456318
-          }
-        },
-        "88e4813027b148a9997d46ca01b01089": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_493b1d7528604a9799db026ff8fe4feb",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a1d6e88d6b7c4041afc2d4db02040233",
-            "value": " 446k/446k [00:00&lt;00:00, 1.64MB/s]"
-          }
-        },
-        "3ea2594afd164aae81d90acc80a451f1": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a9a95b664df94256a8977e656716219a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "0e0e076722cf467c8ba2c48ed7a421a5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d121d804a688426fa389a75348c9f827": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "e4400fc6e334470b883895b61c3d6037": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "493b1d7528604a9799db026ff8fe4feb": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a1d6e88d6b7c4041afc2d4db02040233": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "7330000b9a704aebaeed3f67742208e6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_09d57f7ca0e04c94bd803a075f7f91d9",
-              "IPY_MODEL_314cf1ae89ab4afeb11f1de84a2564eb",
-              "IPY_MODEL_ea1cbc99ccb0453db30f7fd735a9f2e5"
-            ],
-            "layout": "IPY_MODEL_5716bfe94d8547c99e45709090059659"
-          }
-        },
-        "09d57f7ca0e04c94bd803a075f7f91d9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_548bd11142854b12b1537c9726690c1c",
-            "placeholder": "​",
-            "style": "IPY_MODEL_92060595691e42d382a281fb4097311e",
-            "value": "Downloading config.json: 100%"
-          }
-        },
-        "314cf1ae89ab4afeb11f1de84a2564eb": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_9eefdd930bf94d22bc4ba038d3aa86ff",
-            "max": 481,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_8bef2c5a8ee0450e95af02c3e6435902",
-            "value": 481
-          }
-        },
-        "ea1cbc99ccb0453db30f7fd735a9f2e5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_00665f4b6a174be5b3be13831c4f274b",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a7ffcd3cc547410bbbeadfde21545bf4",
-            "value": " 481/481 [00:00&lt;00:00, 20.0kB/s]"
-          }
-        },
-        "5716bfe94d8547c99e45709090059659": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "548bd11142854b12b1537c9726690c1c": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "92060595691e42d382a281fb4097311e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9eefdd930bf94d22bc4ba038d3aa86ff": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8bef2c5a8ee0450e95af02c3e6435902": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "00665f4b6a174be5b3be13831c4f274b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a7ffcd3cc547410bbbeadfde21545bf4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "8cade04bb95e44ed904539d741e64efe": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_73c292b4f50744e891eaae1d8d11a02c",
-              "IPY_MODEL_fb5b3017503245f48f89d2bc4fe5fe0a",
-              "IPY_MODEL_985a9b84851540f68cd8775b1bcf36c1"
-            ],
-            "layout": "IPY_MODEL_623b933656844b3c83ab1c4b1d130b16"
-          }
-        },
-        "73c292b4f50744e891eaae1d8d11a02c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4a2b38ab27584e3a97c1ccd66942e900",
-            "placeholder": "​",
-            "style": "IPY_MODEL_9e9f4becd9fe47638fc7883e83bf928d",
-            "value": "Downloading: 100%"
-          }
-        },
-        "fb5b3017503245f48f89d2bc4fe5fe0a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1433fa72a097434180d2cb1927bf051a",
-            "max": 657434796,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_b4a5e8c6a241479c9049c4b25a270f09",
-            "value": 657434796
-          }
-        },
-        "985a9b84851540f68cd8775b1bcf36c1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0995efa63b404fd49898c5318cf6379d",
-            "placeholder": "​",
-            "style": "IPY_MODEL_efd46accb7a348b0b619aef9d944d53f",
-            "value": " 627M/627M [00:10&lt;00:00, 49.8MB/s]"
-          }
-        },
-        "623b933656844b3c83ab1c4b1d130b16": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4a2b38ab27584e3a97c1ccd66942e900": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "9e9f4becd9fe47638fc7883e83bf928d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "1433fa72a097434180d2cb1927bf051a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b4a5e8c6a241479c9049c4b25a270f09": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "0995efa63b404fd49898c5318cf6379d": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "efd46accb7a348b0b619aef9d944d53f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "4f5effcf41474013a945079f28ba31c2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_15b4e427bd5d4050ab0bdec9dbc6d019",
-              "IPY_MODEL_01a250296169438f9e72dc13119b7238",
-              "IPY_MODEL_76309ca6ee104cbdaab8d997e79c190e"
-            ],
-            "layout": "IPY_MODEL_e424adf2dcc74ada98b2d6f9c29b4cee"
-          }
-        },
-        "15b4e427bd5d4050ab0bdec9dbc6d019": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_41731d8403ad4661a4766d4aab610f3b",
-            "placeholder": "​",
-            "style": "IPY_MODEL_fa15a217dfd349928fd2c8446121a1a4",
-            "value": "Downloading: 100%"
-          }
-        },
-        "01a250296169438f9e72dc13119b7238": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_bb60c6ad128940faa2839bb74dbc6ab5",
-            "max": 657434796,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_90048e3a214346e88b63a02d0cafd97e",
-            "value": 657434796
-          }
-        },
-        "76309ca6ee104cbdaab8d997e79c190e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_79fa68293d544160a988b133dbdcc798",
-            "placeholder": "​",
-            "style": "IPY_MODEL_05712e9c7e3943cb9e13690c6443d134",
-            "value": " 627M/627M [00:15&lt;00:00, 49.4MB/s]"
-          }
-        },
-        "e424adf2dcc74ada98b2d6f9c29b4cee": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "41731d8403ad4661a4766d4aab610f3b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fa15a217dfd349928fd2c8446121a1a4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "bb60c6ad128940faa2839bb74dbc6ab5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "90048e3a214346e88b63a02d0cafd97e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "79fa68293d544160a988b133dbdcc798": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "05712e9c7e3943cb9e13690c6443d134": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d3f7e1595b6b40fc950881a93e4e31f9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_b22c33ac58544011811433e23887560b",
-              "IPY_MODEL_c3051c828ebf4df0a1c7e9283c873dfc",
-              "IPY_MODEL_56d650affeeb40778e4237fa5438d949"
-            ],
-            "layout": "IPY_MODEL_587005e79b4d47bf9bf51ee4278c25da"
-          }
-        },
-        "b22c33ac58544011811433e23887560b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_73f693401f7b4e69a63d8c0415696974",
-            "placeholder": "​",
-            "style": "IPY_MODEL_3a3043af8ce5447d859181fda60ce44d",
-            "value": "Downloading tf_model.h5: 100%"
-          }
-        },
-        "c3051c828ebf4df0a1c7e9283c873dfc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4c79d9ea74454f0386411f533b3c3613",
-            "max": 657434796,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_56a3e8fb03684984bdd822da6932a22b",
-            "value": 657434796
-          }
-        },
-        "56d650affeeb40778e4237fa5438d949": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_c101251a71b94ec29e02ed5a3deafde9",
-            "placeholder": "​",
-            "style": "IPY_MODEL_85e4fb471f0d47588aed5d059938ce3b",
-            "value": " 627M/627M [00:09&lt;00:00, 67.3MB/s]"
-          }
-        },
-        "587005e79b4d47bf9bf51ee4278c25da": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "73f693401f7b4e69a63d8c0415696974": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3a3043af8ce5447d859181fda60ce44d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "4c79d9ea74454f0386411f533b3c3613": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "56a3e8fb03684984bdd822da6932a22b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "c101251a71b94ec29e02ed5a3deafde9": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "85e4fb471f0d47588aed5d059938ce3b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        }
-      }
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file