Merge branch main into feature/Couchbase-vector-store

FlowiseAI · May 6, 2024 · 47f4ec7 · 47f4ec7
2 parents 95d09ef + f9195b6
commit 47f4ec7
Show file tree

Hide file tree

Showing 15 changed files with 581 additions and 37 deletions.
diff --git a/packages/components/credentials/TogetherAIApi.credential.ts b/packages/components/credentials/TogetherAIApi.credential.ts
@@ -0,0 +1,23 @@
+import { INodeParams, INodeCredential } from '../src/Interface'
+
+class TogetherAIApi implements INodeCredential {
+    label: string
+    name: string
+    version: number
+    inputs: INodeParams[]
+
+    constructor() {
+        this.label = 'TogetherAI API'
+        this.name = 'togetherAIApi'
+        this.version = 1.0
+        this.inputs = [
+            {
+                label: 'TogetherAI Api Key',
+                name: 'togetherAIApiKey',
+                type: 'password'
+            }
+        ]
+    }
+}
+
+module.exports = { credClass: TogetherAIApi }
diff --git a/packages/components/nodes/chatmodels/ChatTogetherAI/ChatTogetherAI.ts b/packages/components/nodes/chatmodels/ChatTogetherAI/ChatTogetherAI.ts
@@ -0,0 +1,80 @@
+import { BaseCache } from '@langchain/core/caches'
+import { ChatTogetherAI } from '@langchain/community/chat_models/togetherai'
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
+import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
+
+class ChatTogetherAI_ChatModels implements INode {
+    label: string
+    name: string
+    version: number
+    type: string
+    icon: string
+    category: string
+    description: string
+    baseClasses: string[]
+    inputs: INodeParams[]
+    credential: INodeParams
+
+    constructor() {
+        this.label = 'ChatTogetherAI'
+        this.name = 'chatTogetherAI'
+        this.version = 1.0
+        this.type = 'ChatTogetherAI'
+        this.icon = 'togetherai.png'
+        this.category = 'Chat Models'
+        this.description = 'Wrapper around TogetherAI large language models'
+        this.baseClasses = [this.type, ...getBaseClasses(ChatTogetherAI)]
+        this.credential = {
+            label: 'Connect Credential',
+            name: 'credential',
+            type: 'credential',
+            credentialNames: ['togetherAIApi']
+        }
+        this.inputs = [
+            {
+                label: 'Cache',
+                name: 'cache',
+                type: 'BaseCache',
+                optional: true
+            },
+            {
+                label: 'Model Name',
+                name: 'modelName',
+                type: 'string',
+                placeholder: 'mixtral-8x7b-32768',
+                description: 'Refer to <a target="_blank" href="https://docs.together.ai/docs/inference-models">models</a> page'
+            },
+            {
+                label: 'Temperature',
+                name: 'temperature',
+                type: 'number',
+                step: 0.1,
+                default: 0.9,
+                optional: true
+            }
+        ]
+    }
+
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
+        const modelName = nodeData.inputs?.modelName as string
+        const cache = nodeData.inputs?.cache as BaseCache
+        const temperature = nodeData.inputs?.temperature as string
+        const streaming = nodeData.inputs?.streaming as boolean
+
+        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
+        const togetherAIApiKey = getCredentialParam('togetherAIApiKey', credentialData, nodeData)
+
+        const obj: any = {
+            model: modelName,
+            temperature: parseFloat(temperature),
+            togetherAIApiKey: togetherAIApiKey,
+            streaming: streaming ?? true
+        }
+        if (cache) obj.cache = cache
+
+        const model = new ChatTogetherAI(obj)
+        return model
+    }
+}
+
+module.exports = { nodeClass: ChatTogetherAI_ChatModels }
diff --git a/packages/components/nodes/chatmodels/ChatTogetherAI/togetherai.png b/packages/components/nodes/chatmodels/ChatTogetherAI/togetherai.png
diff --git a/packages/components/nodes/documentloaders/Unstructured/Unstructured.ts b/packages/components/nodes/documentloaders/Unstructured/Unstructured.ts
@@ -0,0 +1,176 @@
+import {
+    HiResModelName,
+    SkipInferTableTypes,
+    UnstructuredLoaderOptions,
+    UnstructuredLoaderStrategy
+} from 'langchain/document_loaders/fs/unstructured'
+import { BaseDocumentLoader } from 'langchain/document_loaders/base'
+import { StringWithAutocomplete } from 'langchain/dist/util/types'
+import { Document } from '@langchain/core/documents'
+
+/**
+ * Set the chunking_strategy to chunk text into larger or smaller elements. Defaults to None with optional arg of by_title
+ */
+type ChunkingStrategy = 'None' | 'by_title'
+
+/**
+ * Represents an element returned by the Unstructured API. It has
+ * properties for the element type, text content, and metadata.
+ */
+type Element = {
+    type: string
+    text: string
+    // this is purposefully loosely typed
+    metadata: {
+        [key: string]: unknown
+    }
+}
+
+export class UnstructuredLoader extends BaseDocumentLoader {
+    public filePath: string
+
+    private apiUrl = 'https://api.unstructured.io/general/v0/general'
+
+    private apiKey?: string
+
+    private strategy: StringWithAutocomplete<UnstructuredLoaderStrategy> = 'hi_res'
+
+    private encoding?: string
+
+    private ocrLanguages: Array<string> = []
+
+    private coordinates?: boolean
+
+    private pdfInferTableStructure?: boolean
+
+    private xmlKeepTags?: boolean
+
+    private skipInferTableTypes?: Array<StringWithAutocomplete<SkipInferTableTypes>>
+
+    private hiResModelName?: StringWithAutocomplete<HiResModelName>
+
+    private includePageBreaks?: boolean
+
+    private chunkingStrategy?: StringWithAutocomplete<ChunkingStrategy>
+
+    private multiPageSections?: boolean
+
+    private combineUnderNChars?: number
+
+    private newAfterNChars?: number
+
+    private maxCharacters?: number
+
+    constructor(optionsOrLegacyFilePath: UnstructuredLoaderOptions) {
+        super()
+
+        const options = optionsOrLegacyFilePath
+        this.apiKey = options.apiKey
+        this.apiUrl = options.apiUrl ?? this.apiUrl
+        this.strategy = options.strategy ?? this.strategy
+        this.encoding = options.encoding
+        this.ocrLanguages = options.ocrLanguages ?? this.ocrLanguages
+        this.coordinates = options.coordinates
+        this.pdfInferTableStructure = options.pdfInferTableStructure
+        this.xmlKeepTags = options.xmlKeepTags
+        this.skipInferTableTypes = options.skipInferTableTypes
+        this.hiResModelName = options.hiResModelName
+        this.includePageBreaks = options.includePageBreaks
+        this.chunkingStrategy = options.chunkingStrategy
+        this.multiPageSections = options.multiPageSections
+        this.combineUnderNChars = options.combineUnderNChars
+        this.newAfterNChars = options.newAfterNChars
+        this.maxCharacters = options.maxCharacters
+    }
+
+    async _partition(buffer: Buffer, fileName: string): Promise<Element[]> {
+        const formData = new FormData()
+        formData.append('files', new Blob([buffer]), fileName)
+        formData.append('strategy', this.strategy)
+        this.ocrLanguages.forEach((language) => {
+            formData.append('ocr_languages', language)
+        })
+        if (this.encoding) {
+            formData.append('encoding', this.encoding)
+        }
+        if (this.coordinates === true) {
+            formData.append('coordinates', 'true')
+        }
+        if (this.pdfInferTableStructure === true) {
+            formData.append('pdf_infer_table_structure', 'true')
+        }
+        if (this.xmlKeepTags === true) {
+            formData.append('xml_keep_tags', 'true')
+        }
+        if (this.skipInferTableTypes) {
+            formData.append('skip_infer_table_types', JSON.stringify(this.skipInferTableTypes))
+        }
+        if (this.hiResModelName) {
+            formData.append('hi_res_model_name', this.hiResModelName)
+        }
+        if (this.includePageBreaks) {
+            formData.append('include_page_breaks', 'true')
+        }
+        if (this.chunkingStrategy) {
+            formData.append('chunking_strategy', this.chunkingStrategy)
+        }
+        if (this.multiPageSections !== undefined) {
+            formData.append('multipage_sections', this.multiPageSections ? 'true' : 'false')
+        }
+        if (this.combineUnderNChars !== undefined) {
+            formData.append('combine_under_n_chars', String(this.combineUnderNChars))
+        }
+        if (this.newAfterNChars !== undefined) {
+            formData.append('new_after_n_chars', String(this.newAfterNChars))
+        }
+        if (this.maxCharacters !== undefined) {
+            formData.append('max_characters', String(this.maxCharacters))
+        }
+
+        const headers = {
+            'UNSTRUCTURED-API-KEY': this.apiKey ?? ''
+        }
+
+        const response = await fetch(this.apiUrl, {
+            method: 'POST',
+            body: formData,
+            headers
+        })
+
+        if (!response.ok) {
+            throw new Error(`Failed to partition file ${this.filePath} with error ${response.status} and message ${await response.text()}`)
+        }
+
+        const elements = await response.json()
+        if (!Array.isArray(elements)) {
+            throw new Error(`Expected partitioning request to return an array, but got ${elements}`)
+        }
+        return elements.filter((el) => typeof el.text === 'string') as Element[]
+    }
+
+    async loadAndSplitBuffer(buffer: Buffer, fileName: string): Promise<Document[]> {
+        const elements = await this._partition(buffer, fileName)
+
+        const documents: Document[] = []
+        for (const element of elements) {
+            const { metadata, text } = element
+            if (typeof text === 'string') {
+                documents.push(
+                    new Document({
+                        pageContent: text,
+                        metadata: {
+                            ...metadata,
+                            category: element.type
+                        }
+                    })
+                )
+            }
+        }
+
+        return documents
+    }
+
+    async load(): Promise<Document[]> {
+        return Promise.reject(new Error('load() is not supported for UnstructuredLoader. Use loadAndSplitBuffer() instead.'))
+    }
+}
diff --git a/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts
@@ -1,12 +1,14 @@
 import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import {
-    UnstructuredLoader,
     UnstructuredLoaderOptions,
     UnstructuredLoaderStrategy,
     SkipInferTableTypes,
-    HiResModelName
+    HiResModelName,
+    UnstructuredLoader as LCUnstructuredLoader
 } from 'langchain/document_loaders/fs/unstructured'
 import { getCredentialData, getCredentialParam } from '../../../src/utils'
+import { getFileFromStorage } from '../../../src'
+import { UnstructuredLoader } from './Unstructured'
 
 class UnstructuredFile_DocumentLoaders implements INode {
     label: string
@@ -23,7 +25,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
     constructor() {
         this.label = 'Unstructured File Loader'
         this.name = 'unstructuredFileLoader'
-        this.version = 2.0
+        this.version = 3.0
         this.type = 'Document'
         this.icon = 'unstructured-file.svg'
         this.category = 'Document Loaders'
@@ -41,7 +43,18 @@ class UnstructuredFile_DocumentLoaders implements INode {
                 label: 'File Path',
                 name: 'filePath',
                 type: 'string',
-                placeholder: ''
+                placeholder: '',
+                optional: true,
+                warning:
+                    'Use the File Upload instead of File path. If file is uploaded, this path is ignored. Path will be deprecated in future releases.'
+            },
+            {
+                label: 'Files Upload',
+                name: 'fileObject',
+                type: 'file',
+                description: 'Files to be processed. Multiple files can be uploaded.',
+                fileType:
+                    '.txt, .text, .pdf, .docx, .doc, .jpg, .jpeg, .eml, .html, .htm, .md, .pptx, .ppt, .msg, .rtf, .xlsx, .xls, .odt, .epub'
             },
             {
                 label: 'Unstructured API URL',
@@ -416,6 +429,7 @@ class UnstructuredFile_DocumentLoaders implements INode {
         const combineUnderNChars = nodeData.inputs?.combineUnderNChars as number
         const newAfterNChars = nodeData.inputs?.newAfterNChars as number
         const maxCharacters = nodeData.inputs?.maxCharacters as number
+        const fileBase64 = nodeData.inputs?.fileObject as string
 
         const obj: UnstructuredLoaderOptions = {
             apiUrl: unstructuredAPIUrl,
@@ -438,8 +452,48 @@ class UnstructuredFile_DocumentLoaders implements INode {
         const unstructuredAPIKey = getCredentialParam('unstructuredAPIKey', credentialData, nodeData)
         if (unstructuredAPIKey) obj.apiKey = unstructuredAPIKey
 
-        const loader = new UnstructuredLoader(filePath, obj)
-        let docs = await loader.load()
+        let docs: any[] = []
+        let files: string[] = []
+
+        if (fileBase64) {
+            const loader = new UnstructuredLoader(obj)
+            //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
+            if (fileBase64.startsWith('FILE-STORAGE::')) {
+                const fileName = fileBase64.replace('FILE-STORAGE::', '')
+                if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                    files = JSON.parse(fileName)
+                } else {
+                    files = [fileName]
+                }
+                const chatflowid = options.chatflowid
+
+                for (const file of files) {
+                    const fileData = await getFileFromStorage(file, chatflowid)
+                    const loaderDocs = await loader.loadAndSplitBuffer(fileData, file)
+                    docs.push(...loaderDocs)
+                }
+            } else {
+                if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
+                    files = JSON.parse(fileBase64)
+                } else {
+                    files = [fileBase64]
+                }
+
+                for (const file of files) {
+                    const splitDataURI = file.split(',')
+                    const filename = splitDataURI.pop()?.split(':')[1] ?? ''
+                    const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                    const loaderDocs = await loader.loadAndSplitBuffer(bf, filename)
+                    docs.push(...loaderDocs)
+                }
+            }
+        } else if (filePath) {
+            const loader = new LCUnstructuredLoader(filePath, obj)
+            const loaderDocs = await loader.load()
+            docs.push(...loaderDocs)
+        } else {
+            throw new Error('File path or File upload is required')
+        }
 
         if (metadata) {
             const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)