How to get large json I want from my context and question #6987
Replies: 1 comment
-
I found a similar discussion that might be helpful: Using langchain to get response from large json data [1]. To extract all data from a PDF and generate a large JSON object, you can use the
Here's a simplified example: import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OpenAIEmbeddings } from "@langchain/openai";
import { PromptTemplate } from "@langchain/core/prompts";
// Load PDF
async function loadPdf(filePath) {
const loader = new PDFLoader(filePath);
const docs = await loader.load();
return docs;
}
// Split text into chunks
async function splitText(text) {
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });
return await splitter.createDocuments([text]);
}
// Generate embeddings
async function generateEmbeddings(chunks) {
const embeddings = new OpenAIEmbeddings({ apiKey: process.env.OPENAI_API_KEY });
return await Promise.all(chunks.map(chunk => embeddings.embedQuery(chunk.pageContent)));
}
// Format JSON
async function formatJson(context, questions) {
const template = new PromptTemplate({
inputVariables: ["api_docs", "question"],
template: `Your JSON template here...`
});
return await template.format({ api_docs: context, question: questions });
}
// Main function
async function main(filePath) {
const docs = await loadPdf(filePath);
const textChunks = await splitText(JSON.stringify(docs));
const embeddings = await generateEmbeddings(textChunks);
const context = textChunks.map(chunk => chunk.pageContent).join("\n\n");
const jsonOutput = await formatJson(context, "Your questions here...");
console.log(jsonOutput);
}
main("path/to/your/pdf.pdf"); This approach will help you extract and format the data from your PDF into a large JSON object. Adjust the |
Beta Was this translation helpful? Give feedback.
-
Checked other resources
Commit to Help
Example Code
Description
Above I send a script which query based on question now I want that I will get answer in json format my json is so large
{
"companyDescription": "string",
"companyLogo": "string",
"companyLogoIcon": "string",
"companyProspectus": "string",
"customFields": [
{
"name": "string",
"value": "string"
}
],
"founded": "string",
"headquarters": "string",
"industryOrSector": "string",
"tokenPrice": "string",
"investmentType": "string",
"investorNationalityWhitelist": [
"string"
],
"issuerSignee": {
"name": "string",
"surname": "string"
},
"issuerWalletAddress": "string",
"issuerOrganizationId": "string",
"legalName": "string",
"newAccountApplication": "string",
"termsAndConditions": "string",
"newsletterOrUpdate": [
"string"
],
"name": "string",
"description": "string",
"descriptionHeader": "string",
"cusip": "string",
"details": "string",
"orderFundedMessage": "string",
"state": "Draft",
"subscriptionAgreement": "string",
"summaryCompanyDescription": "string",
"summaryImage": "string",
"team": [
{
"name": "string",
"photo": "string",
"title": "string"
}
],
"tokenAddress": "string",
"tokenDecimals": 0,
"tokenTicker": "string",
"tokenNetwork": "Mainnet",
"tokenType": "ERC3643",
"tokenizationAgent": {
"type": "None"
},
"transferAgent": {
"type": "None"
},
"website": "string",
"payments": {
"wireTransfer": {
"bankAccountNumber": "string",
"bankAddress": "string",
"bankName": "string",
"companyAddress": "string",
"companyName": "string",
"routingNumber": "string",
"swiftCode": "string"
},
"cryptoTransfer": {
"contractType": "TrueFi",
"cryptocurrency": "USDC",
"contractAddress": "string",
"contractNetwork": "Mainnet"
},
"achTransfer": true,
"noneTransfer": true
},
"primaryMarket": true,
"secondaryMarket": true,
"rfqEnabled": true,
"endDate": "2024-10-14T11:06:20.251Z",
"position": 0,
"restrictedForDirectInvestors": true,
"assetType": "None",
"privateEquityConfig": {
"fundManager": "string",
"termOfFund": 0,
"issueDate": "2024-10-14T11:06:20.251Z",
"closingDate": "2024-10-14T11:06:20.251Z",
"navLaunchPrice": 0,
"targetIRR": 0,
"managementFees": [
{
"from": 0,
"to": 0,
"percent": 0
}
],
"preferredReturn": 0,
"catchUp": 0,
"lpGpAllocation": 0,
"dividendFrequency": "string",
"fundCurrency": "string",
"targetAUM": 0,
"latestNav": 0,
"investmentSize": 0,
"sector": "string",
"generalPartnerCommitment": 0,
"fundManagerWalletAddress": "string",
"individualMinCommitment": 0,
"individualMaxCommitment": 0,
"institutionMinCommitment": 0,
"institutionMaxCommitment": 0,
"commitmentSchedule": {
"startDate": "2024-10-14T11:06:20.251Z",
"endDate": "2024-10-14T11:06:20.251Z"
},
"capitalCallsSchedule": [
{
"startDate": "2024-10-14T11:06:20.251Z",
"endDate": "2024-10-14T11:06:20.251Z"
}
],
"redemptionsSchedule": [
{
"startDate": "2024-10-14T11:06:20.251Z",
"endDate": "2024-10-14T11:06:20.251Z"
}
],
"delayForCapitalCallToRaise": 0,
"delayForCapitalCallToRaiseUnit": "Hours",
"termOfFundExtension": 0,
"paymentMethod": "StableCoins",
"paymentType": "USDC",
"enableTaApproval": true
},
"roundConfig": {
"endDate": "2024-10-14T11:06:20.251Z",
"investmentMinimum": 0,
"investmentSize": 0,
"tokenPrice": 0,
"latestNav": 0,
"fundTokenNetworks": [
"None"
],
"fundPaymentMethods": [
"wire"
],
"holdingMethod": [
"FundShare"
]
},
"brokerOrganizationId": "string",
"isMultiSig": true,
"isSubscribedForTaApproval": true,
"isDvP": true
}
Now curretkr I am sending question and based on that it get relevant embedding bit now I have these I want all so how can I get this all data from pdf
System Info
windows , node -20
Beta Was this translation helpful? Give feedback.
All reactions