-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
63 lines (50 loc) · 1.53 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
package main
import (
"context"
"flag"
"log"
"os"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/filter"
"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
)
func main() {
// Init Apache Beam
beam.Init()
// Get env vars
PROJECT_ID := os.Getenv("PROJECT_ID")
COLLECTION := os.Getenv("COLLECTION")
CREDENTIALS_PATH := os.Getenv("CREDENTIALS_PATH")
// Flag for input and output files
input := flag.String("input", "input.txt", "Input file to process")
flag.Parse()
// Create the pipeline
p := beam.NewPipeline()
s := p.Root()
// Read questions as PCollection of []Question
questions := readQuestions(s, *input)
// Validate questions to match schema (text, type, author)
validQuestions := beam.ParDo(s, func(q *Question) (*Question, error) {
if err := validateQuestion(q); err != nil {
return nil, err
}
return q, nil
}, questions)
// Filter out nil questions
validQuestions = filter.Exclude(s, validQuestions, isNilQuestion)
// Contextual Data Augmentation With Claude (From free-response to Multiple-Choice Questions)
llmClient := &LLMClient{}
mQuestions := beam.ParDo(s, llmClient, validQuestions)
// Initialize the firestore writer
firestoreWriter := &FirestoreWriter{
ProjectID: PROJECT_ID,
Collection: COLLECTION,
CredPath: CREDENTIALS_PATH,
}
// Write to Firestore
beam.ParDo0(s, firestoreWriter, mQuestions)
// Run the pipeline
if err := beamx.Run(context.Background(), p); err != nil {
log.Fatalf("Failed to execute job: %v", err)
}
}