Updating image and code cleanup

clinical-data-mining · Nov 25, 2024 · f977bb6 · f977bb6
1 parent 314b6aa
commit f977bb6
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 57 deletions.
diff --git a/docs/reference/images/compute_cluster.png b/docs/reference/images/compute_cluster.png
diff --git a/examples/databricks-llama-test.ipynb b/examples/databricks-llama-test.ipynb
@@ -15,7 +15,7 @@
     "- The `SERVING_MODEL` must be a serving endpoint in Databricks. This process in done in the [Databricks UI](https://msk-mode-test.cloud.databricks.com/ml/endpoints/)\n",
     "- When testing is completed, MAKE SURE THE `CDSI ML Cluster` IS NO LONGER RUNNING. Unexpected costs will result even if cluster is idle!\n",
     "\n",
-    "!['Compute cluster'](../docs/reference/images/compute_cluster.png)\n",
+    "!['Compute cluster'](https://github.com/clinical-data-mining/msk_cdm/blob/main/docs/reference/images/compute_cluster.png?raw=true)\n",
     "    "
    ],
    "id": "5f0f96e52ae54ff5"
@@ -25,118 +25,124 @@
    "id": "1660925dae63fc66",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-11-25T23:39:55.269786Z",
-     "start_time": "2024-11-25T23:39:55.266304Z"
+     "end_time": "2024-11-25T23:45:26.226733Z",
+     "start_time": "2024-11-25T23:45:25.229119Z"
     }
    },
    "source": "from openai import OpenAI\n",
    "outputs": [],
-   "execution_count": 6
+   "execution_count": 1
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-11-25T23:39:55.361127Z",
-     "start_time": "2024-11-25T23:39:55.358009Z"
+     "end_time": "2024-11-25T23:55:25.020101Z",
+     "start_time": "2024-11-25T23:55:25.016721Z"
     }
    },
    "cell_type": "code",
    "source": [
     "DATABRICKS_TOKEN = '<YOUR-TOKEN-HERE>'\n",
-    "SERVING_MODEL = \"meta_llama_3_8b_instruct_cdm\"\n",
-    "MAX_TOKENS = 256\n"
+    "SERVING_MODEL = 'meta_llama_3_8b_instruct_cdm'\n",
+    "MAX_TOKENS = 256\n",
+    "ENDPOINT_URL = 'https://msk-mode-test.cloud.databricks.com/serving-endpoints'\n"
    ],
    "id": "f841f23f1f9b3923",
    "outputs": [],
    "execution_count": 7
   },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-25T23:56:27.314456Z",
+     "start_time": "2024-11-25T23:56:27.310935Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "USER_PROMPT = \"What are the sites of disease based on this piece of text: Since CT scan of DATE, Predominantly upper lobe and superior segment lower lobe ground glass nodules are not seen on this chest radiograph\"",
+   "id": "a9da7959a6f2bb8",
+   "outputs": [],
+   "execution_count": 12
+  },
   {
    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
-     "end_time": "2024-11-25T23:39:55.479054Z",
-     "start_time": "2024-11-25T23:39:55.465727Z"
+     "end_time": "2024-11-25T23:55:27.207501Z",
+     "start_time": "2024-11-25T23:55:27.194660Z"
     }
    },
    "cell_type": "code",
    "source": [
     "# Create client with Databricks\n",
     "client = OpenAI(\n",
     "    api_key=DATABRICKS_TOKEN,\n",
-    "    base_url=\"https://msk-mode-test.cloud.databricks.com/serving-endpoints\"\n",
-    ")\n",
-    "\n"
+    "    base_url=ENDPOINT_URL\n",
+    ")\n"
    ],
    "id": "initial_id",
    "outputs": [],
-   "execution_count": 8
+   "execution_count": 9
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-11-25T23:39:55.623657Z",
-     "start_time": "2024-11-25T23:39:55.620263Z"
+     "end_time": "2024-11-25T23:55:28.459488Z",
+     "start_time": "2024-11-25T23:55:28.455569Z"
     }
    },
    "cell_type": "code",
-   "source": " USER_PROMPT = \"What are the sites of disease based on this piece of text: IMPRESSION: Since CT scan of DATE, Predominantly upper lobe and superior segment lower lobe ground glass nodules are not seen on this chest radiograph. No consolidation or effusion.\"",
-   "id": "feab31d6aad17116",
+   "source": [
+    "def llm_prompt(\n",
+    "        prompt: str,\n",
+    "        serving_model=SERVING_MODEL,\n",
+    "        max_tokens=MAX_TOKENS\n",
+    "):\n",
+    "    chat_completion = client.chat.completions.create(\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": \"You are an AI assistant\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": prompt\n",
+    "            }\n",
+    "        ],\n",
+    "        model=serving_model,\n",
+    "        max_tokens=max_tokens\n",
+    "    )\n",
+    "    \n",
+    "    return chat_completion.choices[0].message.content \n",
+    "    "
+   ],
+   "id": "e1c8106b1bfbb9fd",
    "outputs": [],
-   "execution_count": 9
+   "execution_count": 10
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-11-25T23:39:57.198005Z",
-     "start_time": "2024-11-25T23:39:55.714907Z"
+     "end_time": "2024-11-25T23:56:32.184426Z",
+     "start_time": "2024-11-25T23:56:31.299260Z"
     }
    },
    "cell_type": "code",
-   "source": [
-    "chat_completion = client.chat.completions.create(\n",
-    "    messages=[\n",
-    "        {\n",
-    "            \"role\": \"system\",\n",
-    "            \"content\": \"You are an AI assistant\"\n",
-    "        },\n",
-    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": USER_PROMPT\n",
-    "        }\n",
-    "    ],\n",
-    "    model=SERVING_MODEL,\n",
-    "    max_tokens=MAX_TOKENS\n",
-    ")\n",
-    "\n",
-    "print(chat_completion.choices[0].message.content)"
-   ],
-   "id": "e1c8106b1bfbb9fd",
+   "source": "print(llm_prompt(prompt=USER_PROMPT))",
+   "id": "8e6985ffc3d5f21f",
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "According to the text, the site of disease is the upper lobe and superior segment of the lower lobe of the lungs, specifically:\n",
+      "The sites of disease mentioned in the text are:\n",
       "\n",
-      "* Predominantly upper lobe\n",
-      "* Superior segment lower lobe (indicating a specific region within the lower lobe)\n"
+      "* Upper lobe\n",
+      "* Superior segment of the lower lobe\n"
      ]
     }
    ],
-   "execution_count": 10
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-11-25T23:39:57.404001Z",
-     "start_time": "2024-11-25T23:39:57.397328Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "",
-   "id": "8e6985ffc3d5f21f",
-   "outputs": [],
-   "execution_count": null
+   "execution_count": 13
   }
  ],
  "metadata": {