Merge branch 'beta' of https://github.com/e2b-dev/E2B into beta

e2b-dev · Oct 14, 2024 · c824c3d · c824c3d
2 parents fb919b1 + b14cb5e
commit c824c3d
Show file tree

Hide file tree

Showing 6 changed files with 354 additions and 20 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/apps/web/src/app/(docs)/docs/code-interpreting/analyze-data-with-ai/page.mdx b/apps/web/src/app/(docs)/docs/code-interpreting/analyze-data-with-ai/page.mdx
@@ -0,0 +1,345 @@
+import Image from 'next/image'
+import imgChart from '@/images/analyze-data-chart.png'
+
+# Analyze data with AI
+
+You can use E2B Sandbox to run AI-generated code to analyze data. Here's how the AI data analysis workflow usually looks like:
+1. Your user has a dataset in CSV format or other formats.
+2. You prompt the LLM to generate code (usually Python) based on the user's data.
+3. The sandbox runs the AU-generated code and returns the results.
+4. You display the results to the user.
+
+---
+
+## Example: Analyze CSV file with E2B and Claude 3.5 Sonnet
+This short example will show you how to use E2B Sandbox to run AI-generated code to analyze CSV data.
+
+### Table of Contents
+1. [Install dependencies](#1-install-dependencies)
+2. [Set your API keys](#2-set-your-api-keys)
+3. [Download example CSV file](#3-download-example-csv-file)
+4. [Initialize the sandbox and upload the dataset to the sandbox](#4-initialize-the-sandbox-and-upload-the-dataset-to-the-sandbox)
+5. [Prepare the method for running AI-generated code](#5-prepare-the-method-for-running-ai-generated-code)
+6. [Prepare the prompt and initialize Anthropic client](#6-prepare-the-prompt-and-initialize-anthropic-client)
+7. [Connect the sandbox to the LLM with tool calling](#7-connect-the-sandbox-to-the-llm-with-tool-calling)
+8. [Parse the response and run the AI-generated code in the sandbox](#8-parse-the-response-and-run-the-ai-generated-code-in-the-sandbox)
+9. [Save the generated chart](#9-save-the-generated-chart)
+10. [Run the full code](#10-run-the-full-code)
+
+### 1. Install dependencies
+Install the E2B SDK and Claude SDK to your project by running the following command in your terminal.
+
+<CodeGroup isTerminalCommand>
+```bash {{ language: 'js' }}
+npm i @e2b/code-interpreter @anthropic-ai/sdk dotenv
+```
+```bash {{ language: 'python' }}
+pip install e2b_code_interpreter anthropic python-dotenv
+```
+</CodeGroup>
+
+### 2. Set your API keys
+1. Get your E2B API key from [E2B Dashboard](/dashboard?tab=keys).
+2. Get your Claude API key from [Claude API Dashboard](https://console.anthropic.com/settings/keys).
+3. Paste the keys into your `.env` file.
+
+<CodeGroup title=".env">
+```bash
+E2B_API_KEY=e2b_***
+ANTHROPIC_API_KEY=sk-ant-***
+```
+</CodeGroup>
+
+### 3. Download example CSV file
+{/* We'll be using the publicly available [AirBnB NYC dataset](https://www.kaggle.com/datasets/dgomonov/new-york-city-airbnb-open-data). */}
+
+We'll be using the publicly available [dataset of about 10,000 movies](https://www.kaggle.com/datasets/muqarrishzaib/tmdb-10000-movies-dataset).
+1. Click the "Download" button at the top of the page.
+2. Select "Download as zip (2 MB)".
+3. Unzip the file and you should see `dataset.csv` file. Move it to the root of your project.
+
+
+### 4. Initialize the sandbox and upload the dataset to the sandbox
+We'll upload the dataset from the third step to the sandbox and save it as `dataset.csv` file.
+
+<CodeGroup>
+```js {{ language: 'js', description: 'index.ts' }}
+import 'dotenv/config'
+import fs from 'fs'
+import { Sandbox } from '@e2b/code-interpreter'
+
+// Create sandbox
+const sbx = await Sandbox.create()
+
+// Upload the dataset to the sandbox
+const content = fs.readFileSync('dataset.csv')
+const datasetPathInSandbox = await sbx.files.write('dataset.csv', content)
+```
+```python {{ language: 'python', description: 'main.py' }}
+from dotenv import load_dotenv
+load_dotenv()
+from e2b_code_interpreter import Sandbox
+
+# Create sandbox
+sbx = Sandbox()
+
+# Upload the dataset to the sandbox
+dataset_path_in_sandbox = ""
+with open("dataset.csv", "rb") as f:
+    dataset_path_in_sandbox = sbx.files.write("dataset.csv", f)
+
+```
+</CodeGroup>
+
+### 5. Prepare the method for running AI-generated code
+Add the following code to the file. Here we're adding the method for code execution.
+
+<CodeGroup>
+```js {{ language: 'js', description: 'index.ts' }}
+// ... code from the previous step
+
+async function runAIGeneratedCode(aiGeneratedCode: string) {
+  console.log('Running the code in the sandbox....')
+  const execution = await sbx.runCode(aiGeneratedCode)
+  console.log('Code execution finished!')
+  console.log(execution)
+}
+```
+```python {{ language: 'python', description: 'main.py' }}
+# ... code from the previous step
+
+def run_ai_generated_code(ai_generated_code: str):
+    print('Running the code in the sandbox....')
+    execution = sbx.run_code(ai_generated_code)
+    print('Code execution finished!')
+    print(execution)
+```
+</CodeGroup>
+
+### 6. Prepare the prompt and initialize Anthropic client
+The prompt we'll be using describes the dataset and the analysis we want to perform like this:
+    1. Describe the columns in the CSV dataset.
+    2. Ask the LLM what we want to analyze - here we want to analyze the vote average over time. We're asking for a chart as the output.
+    3. Instruct the LLM to generate Python code for the data analysis.
+
+<CodeGroup>
+```js {{ language: 'js', description: 'index.ts' }}
+import Anthropic from '@anthropic-ai/sdk'
+
+const prompt = `
+I have a CSV file about movies. It has about 10k rows. It's saved in the sandbox at ${datasetPathInSandbox.path}.
+These are the columns:
+- 'id': number, id of the movie
+- 'original_language': string like "eng", "es", "ko", etc
+- 'original_title': string that's name of the movie in the original language
+- 'overview': string about the movie
+- 'popularity': float, from 0 to 9137.939. It's not normalized at all and there are outliers
+- 'release_date': date in the format yyyy-mm-dd
+- 'title': string that's the name of the movie in english
+- 'vote_average': float number between 0 and 10 that's representing viewers voting average
+- 'vote_count': int for how many viewers voted
+
+I want to better understand how the vote average has changed over the years. Write Python code that analyzes the dataset based on my request and produces right chart accordingly`
+
+const anthropic = new Anthropic()
+const msg = await anthropic.messages.create({
+  model: 'claude-3-5-sonnet-20240620',
+  max_tokens: 1024,
+  messages: [{ role: 'user', content: prompt }],
+})
+```
+
+```python {{ language: 'python', description: 'main.py' }}
+from dotenv import load_dotenv
+load_dotenv()
+from anthropic import Anthropic
+
+prompt = '''
+I have a CSV file about movies. It has about 10k rows. It's saved in the sandbox at {datasetPathInSandbox.path}.
+These are the columns:
+- 'id': number, id of the movie
+- 'original_language': string like "eng", "es", "ko", etc
+- 'original_title': string that's name of the movie in the original language
+- 'overview': string about the movie
+- 'popularity': float, from 0 to 9137.939. It's not normalized at all and there are outliers
+- 'release_date': date in the format yyyy-mm-dd
+- 'title': string that's the name of the movie in english
+- 'vote_average': float number between 0 and 10 that's representing viewers voting average
+- 'vote_count': int for how many viewers voted
+
+I want to better understand how the vote average has changed over the years. Write Python code that analyzes the dataset based on my request and produces right chart accordingly'''
+
+anthropic = Anthropic()
+msg = anthropic.messages.create(
+  model='claude-3-5-sonnet-20240620',
+  max_tokens=1024,
+  messages=[
+    {"role": "user", "content": prompt}
+  ]
+)
+```
+</CodeGroup>
+
+### 7. Connect the sandbox to the LLM with tool calling
+We'll use Claude's ability to [use tools (function calling)](https://docs.anthropic.com/en/docs/build-with-claude/tool-use) to run the code in the sandbox.
+
+The way we'll do it is by connecting the method for running AI-generated code we created in the previous step to the Claude model.
+
+Update the initialization of the Anthropic client to include the tool use like this:
+<CodeGroup>
+```js {{ language: 'js', description: 'index.ts' }}
+const msg = await anthropic.messages.create({
+  model: 'claude-3-5-sonnet-20240620',
+  max_tokens: 1024,
+  messages: [{ role: 'user', content: prompt }],
+  tools: [ // $HighlightLine
+     { // $HighlightLine
+      name: 'run_python_code', // $HighlightLine
+      description: 'Run Python code', // $HighlightLine
+      input_schema: { // $HighlightLine
+        type: 'object', // $HighlightLine
+        properties: { // $HighlightLine
+          code: { // $HighlightLine
+            type: 'string', // $HighlightLine
+            description: 'The Python code to run', // $HighlightLine
+          }, // $HighlightLine
+        }, // $HighlightLine
+        required: ['code'], // $HighlightLine
+      }, // $HighlightLine
+    }, // $HighlightLine
+  ], // $HighlightLine
+})
+```
+```python {{ language: 'python', description: 'main.py' }}
+msg = anthropic.messages.create(
+  model='claude-3-5-sonnet-20240620',
+  max_tokens=1024,
+  messages=[
+    {"role": "user", "content": prompt}
+  ],
+  tools=[ # $HighlightLine
+    { # $HighlightLine
+      "name": "run_python_code", # $HighlightLine
+      "description": "Run Python code", # $HighlightLine
+      "input_schema": { # $HighlightLine
+        "type": "object", # $HighlightLine
+        "properties": { # $HighlightLine
+          "code": { "type": "string", "description": "The Python code to run" }, # $HighlightLine
+        }, # $HighlightLine
+        "required": ["code"], # $HighlightLine
+      }, # $HighlightLine
+    }, # $HighlightLine
+  ], # $HighlightLine
+)
+```
+</CodeGroup>
+
+### 8. Parse the LLM's response and run the AI-generated code in the sandbox
+Now we'll parse the `msg` object to get the code from the LLM's response based on the tool we created in the previous step.
+Once we have the code, we'll pass it to the `runAIGeneratedCode` method in JavaScript or `run_ai_generated_code` method in Python we created in the previous step to run the code in the sandbox.
+
+<CodeGroup>
+```js {{ language: 'js', description: 'index.ts' }}
+// ... code from the previous steps
+
+interface CodeRunToolInput {
+  code: string
+}
+
+for (const contentBlock of msg.content) {
+  if (contentBlock.type === 'tool_use') {
+    if (contentBlock.name === 'run_python_code') {
+      const code = (contentBlock.input as CodeRunToolInput).code
+      console.log('Will run following code in the sandbox', code)
+      // Execute the code in the sandbox
+      await runAIGeneratedCode(code)
+    }
+  }
+}
+```
+```python {{ language: 'python', description: 'main.py' }}
+for content_block in msg.content:
+    if content_block.type == 'tool_use':
+        if content_block.name == 'run_python_code':
+            code = content_block.input['code']
+            print('Will run following code in the sandbox', code)
+            # Execute the code in the sandbox
+            run_ai_generated_code(code)
+```
+</CodeGroup>
+
+### 9. Save the generated chart
+When running code in the sandbox for data analysis, you can get different types of results.
+Including stdout, stderr, charts, tables, text, runtime errors, and more.
+
+In this example we're specifically asking for a chart so we'll be looking for the chart in the results.
+
+Let's update the `runAIGeneratedCode` method in JavaScript and `run_ai_generated_code` method in Python to check for the chart in the results and save it to the file.
+<CodeGroup>
+```js {{ language: 'js', description: 'index.ts' }}
+async function runAIGeneratedCode(aiGeneratedCode: string) {
+  console.log('Running the code in the sandbox....')
+  const execution = await sbx.runCode(aiGeneratedCode)
+  console.log('Code execution finished!')
+
+  // First let's check if the code ran successfully.
+  if (execution.error) { // $HighlightLine
+    console.error('AI-generated code had an error. Try prompting the LLM differently.', execution.error) // $HighlightLine
+    process.exit(1) // $HighlightLine
+  } // $HighlightLine
+
+  // Iterate over all the results and specifically check for png files that will represent the chart.
+  let resultIdx = 0 // $HighlightLine
+  for (const result of execution.results) { // $HighlightLine
+    if (result.png) { // $HighlightLine
+      // Save the png to a file
+      // The png is in base64 format.
+      fs.writeFileSync(`chart-${resultIdx}.png`, result.png, { encoding: 'base64' }) // $HighlightLine
+      resultIdx++ // $HighlightLine
+    } // $HighlightLine
+  } // $HighlightLine
+}
+```
+```python {{ language: 'python', description: 'main.py' }}
+def run_ai_generated_code(ai_generated_code: str):
+    print('Running the code in the sandbox....')
+    execution = sbx.run_code(ai_generated_code)
+    print('Code execution finished!')
+
+    # First let's check if the code ran successfully.
+    if execution.error: # $HighlightLine
+        print('AI-generated code had an error. Try prompting the LLM differently.', execution.error) # $HighlightLine
+        sys.exit(1) # $HighlightLine
+
+    # Iterate over all the results and specifically check for png files that will represent the chart.
+    result_idx = 0 # $HighlightLine
+    for result in execution.results: # $HighlightLine
+        if result.png: # $HighlightLine
+            # Save the png to a file
+            # The png is in base64 format.
+            with open(f'chart-{result_idx}.png', 'wb') as f: # $HighlightLine
+                f.write(base64.b64decode(result.png)) # $HighlightLine
+            result_idx += 1 # $HighlightLine
+```
+</CodeGroup>
+
+### 10. Run the full code
+Now you can run the whole code to see the results.
+
+<CodeGroup isTerminalCommand>
+```bash {{ language: 'js' }}
+npx tsx index.ts
+```
+```bash {{ language: 'python' }}
+python main.py
+```
+</CodeGroup>
+
+You should see the chart in the root of your project that will look similar to this:
+
+<Image
+  src={imgChart}
+  className="rounded w-full"
+  alt="Chart visualizing voting average of our dataset over time"
+  unoptimized
+/>
diff --git a/...lyze-data/preinstalled-libraries/page.mdx → ...a-with-ai/preinstalled-libraries/page.mdx b/...lyze-data/preinstalled-libraries/page.mdx → ...a-with-ai/preinstalled-libraries/page.mdx
diff --git a/apps/web/src/app/(docs)/docs/code-interpreting/analyze-data/page.mdx b/apps/web/src/app/(docs)/docs/code-interpreting/analyze-data/page.mdx
diff --git a/apps/web/src/components/Navigation/routes.tsx b/apps/web/src/components/Navigation/routes.tsx
@@ -146,22 +146,22 @@ export const routes: NavGroup[] = [
     title: '* Code Interpreting',
     items: [
       {
-        title: '* Analyze data with AI',
+        title: 'Analyze data with AI',
         links: [
           {
             title: 'Overview',
-            href: '/docs/code-interpreting/analyze-data',
+            href: '/docs/code-interpreting/analyze-data-with-ai',
           },
           {
             title: 'Pre-installed libraries',
-            href: '/docs/code-interpreting/analyze-data/pre-installed-libraries',
-          },
-          {
-            title: '* Example',
-            href: '/docs/code-interpreting/analyze-data/example',
+            href: '/docs/code-interpreting/analyze-data-with-ai/pre-installed-libraries',
           },
         ],
       },
+      {
+        title: '* Connect your data',
+        href: '/docs/code-interpreting/connect-your-data',
+      },
       {
         title: 'Supported languages',
         links: [
@@ -188,12 +188,8 @@ export const routes: NavGroup[] = [
         ]
       },
       {
-        title: '* Create charts & visualizations',
-        href: '/docs/code-interpreting/create-charts',
-      },
-      {
-        title: '* Connect your data',
-        href: '/docs/code-interpreting/connect-your-data',
+        title: '* Code interpreting results',
+        href: '/docs/code-interpreting/todo',
       },
     ]
   },

diff --git a/apps/web/src/images/analyze-data-chart.png b/apps/web/src/images/analyze-data-chart.png