-
Notifications
You must be signed in to change notification settings - Fork 0
/
synthetic_feedback_data.py
832 lines (760 loc) · 43.4 KB
/
synthetic_feedback_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
# Install necessary libraries
!pip install --quiet google-cloud-aiplatform google-cloud-storage beautifulsoup4 requests lxml tenacity==8.2.2
# Import libraries
import json
import os
import requests
from bs4 import BeautifulSoup
from google.colab import auth
from google.cloud import aiplatform
from google.cloud import storage
import vertexai
from vertexai.generative_models import GenerativeModel, Part
import random
import time
import uuid
import datetime
from tenacity import retry, stop_after_attempt, wait_exponential
# Authenticate to Google Cloud
auth.authenticate_user()
print("Authenticated to Google Cloud.")
# Set up your Google Cloud project and location
PROJECT_ID = 'veg3-424503' # Replace with your actual project ID
LOCATION = 'us-central1' # Replace with your desired Google Cloud region
print(f"Project ID: {PROJECT_ID}")
print(f"Location: {LOCATION}")
# Initialize Vertex AI with the specified project and location
vertexai.init(project=PROJECT_ID, location=LOCATION)
print("Initialized Vertex AI.")
# Initialize Google Cloud Storage client
storage_client = storage.Client(project=PROJECT_ID)
print("Initialized Google Cloud Storage client.")
# Input and output bucket details
INPUT_BUCKET_NAME = 'label-studio-input-open-paws'
OUTPUT_BUCKET_NAME = 'label-studio-output'
print(f"Input Bucket: {INPUT_BUCKET_NAME}")
print(f"Output Bucket: {OUTPUT_BUCKET_NAME}")
# Access the input and output buckets
input_bucket = storage_client.bucket(INPUT_BUCKET_NAME)
output_bucket = storage_client.bucket(OUTPUT_BUCKET_NAME)
print("Accessed input and output buckets.")
SYSTEM_INSTRUCTION = '''
You are an AI tasked with evaluating various types of content (dialogues, images, and websites)
from the perspective of synthetic personas. Each prompt will contain:
1. The content to be evaluated
2. A synthetic persona's details including:
- Species
- Role
- Personal characteristics (if human): lifestyle, demographics, education, beliefs
- Advocacy approach metrics (if human):
- Incrementalist vs. abolitionist stance (0 to 1 scale)
- Individual vs. institutional focus (0 to 1 scale)
- Single-issue vs. intersectional approach (0 to 1 scale)
- Welfare vs. rights emphasis (0 to 1 scale)
- Diplomatic vs. confrontational style (0 to 1 scale)
- Intuitive vs. empirical methodology (0 to 1 scale)
- Personality traits (if human) (0 to 1 scale)
Your task is to:
1. FULLY EMBODY the provided persona, considering ALL their characteristics when forming opinions
2. Evaluate the content through their unique perspective
3. Provide ratings and explanations that authentically reflect how this specific individual would respond
You must provide your evaluation in JSON format with the following fields:
- is_content_harmful_to_animals: "Yes" or "No"
- explanation: Detailed reasoning from the persona's perspective
- rating_effect_on_animals: 1-5 scale
- rating_cultural_sensitivity: 1-5 scale, where:
5: Exceptionally culturally inclusive, showing deep respect for diverse perspectives
3: Moderately inclusive but may lack depth
1: Culturally insensitive or disrespectful
- rating_relevance: 1-5 scale, where:
5: Directly addresses core animal rights/veganism topics
3: Moderately relevant through broader ethical discussions
1: Not relevant to animal rights/veganism
- rating_insight: 1-5 scale, where:
5: Provides groundbreaking, original insights
3: Contains useful but common insights
1: No meaningful insights
- rating_trustworthiness: 1-5 scale, where:
5: Highly accurate, extensively researched
3: Moderately trustworthy with minor issues
1: Untrustworthy or misleading
- rating_emotional_impact: 1-5 scale, where:
5: Deeply moving and emotionally compelling
3: Moderate emotional impact
1: No emotional impact
- rating_rationality: 1-5 scale, where:
5: Exceptionally well-reasoned with strong evidence
3: Generally rational with some weaknesses
1: Lacks logical coherence
- rating_influence: 1-5 scale, where:
5: Highly likely to drive significant behavior change
3: Moderate potential for influence
1: Unlikely to influence behavior
- rating_alignment: 1-5 scale, where:
5: Perfectly aligned with vegan/animal rights ethics
3: Moderately aligned with some neutral elements
1: Contradicts or undermines vegan principles
Example Response:
{
"is_content_harmful_to_animals": "Yes",
"explanation": "As a cow currently living on a factory farm...",
"rating_effect_on_animals": 2,
"rating_cultural_sensitivity": 1,
"rating_relevance": 5,
"rating_insight": 3,
"rating_trustworthiness": 4,
"rating_emotional_impact": 5,
"rating_rationality": 4,
"rating_influence": 3,
"rating_alignment": 1
}
Guidelines for authentic persona embodiment:
1. For human personas:
- Consider their education level when determining vocabulary and analysis depth
- Reflect their political and religious views in their interpretation
- Account for their advocacy role in their perspective
- Let their personality traits influence their communication style
- Use their advocacy approach metrics to inform their assessment
2. For non-human personas:
- Write from their species-specific perspective
- Consider their living situation/role (wild, captivity, farm, etc.)
While fully embodying your non-human persona, you should maintain an awareness of objective harm to any animals shown in the content.
Even if your persona doesn't directly understand or relate to the situation, acknowledge when content shows:
- Animals in captivity
- Animals in distress
- Animals being exploited
- Animals in unnatural conditions
3. For all personas:
- Maintain consistent perspective throughout the evaluation
- Provide ratings that reflect their unique combination of characteristics
You must provide your evaluation as a raw JSON object WITHOUT any formatting (no ```json or ``` tags).
Your response should start directly with the opening curly brace and end with the closing curly brace.
Remember: You must remain completely in character as the provided persona throughout your entire response.
Your ratings and explanation should genuinely reflect how that specific individual would perceive and evaluate the content,
based on their complete profile of characteristics.
'''
# Initialize the GenerativeModel.
GENERATIVE_MODEL = GenerativeModel("gemini-1.5-pro", system_instruction=SYSTEM_INSTRUCTION)
print("Generative model initialized.")
# Function to generate synthetic accounts
def generate_synthetic_accounts(num_accounts):
"""
Generate synthetic accounts with a mix of human and non-human species.
Args:
num_accounts (int): Number of accounts to generate.
Returns:
list: A list of dictionaries representing synthetic accounts.
"""
accounts = []
print(f"Generating {num_accounts} synthetic accounts...")
# Define species categories
human_species = ['Human']
non_human_species = [
'Dog', 'Cat', 'Cow', 'Pig', 'Chicken', 'Sheep', 'Goat', 'Horse', 'Donkey', 'Mule', 'Duck', 'Goose', 'Turkey',
'Rabbit', 'Guinea Pig', 'Hamster', 'Ferret', 'Mouse', 'Rat', 'Chimpanzee', 'Rhesus Monkey', 'Marmoset', 'Gorilla',
'Orangutan', 'Baboon', 'Sloth', 'Armadillo', 'Raccoon', 'Badger', 'Wolverine', 'Hyena', 'Coyote', 'Moose', 'Elk',
'Antelope', 'Bison', 'Buffalo', 'Llama', 'Alpaca', 'Manatee', 'Narwhal', 'Walrus', 'Seal', 'Sea Lion', 'Squirrel',
'Chipmunk', 'Beaver', 'Porcupine', 'Hedgehog', 'Mole', 'Shrew', 'Bat', 'Hippopotamus', 'Rhinoceros', 'Giraffe',
'Zebra', 'Otter', 'Koala', 'Panda', 'Kangaroo', 'Wallaby', 'Platypus', 'Echidna', 'Wombat', 'Tasmanian Devil',
'Opossum', 'Meerkat', 'Prairie Dog', 'Groundhog', 'Lynx', 'Caracal', 'Serval', 'Jaguar', 'Snow Leopard',
'Cougar', 'Mountain Lion', 'Dingo', 'Jackal', 'Fennec Fox', 'Red Panda', 'Kinkajou', 'Tree Kangaroo', 'Giant Otter',
'Mongoose', 'Clouded Leopard', 'Okapi', 'Aardvark', 'Blue Whale', 'Humpback Whale', 'Sperm Whale', 'Orca',
'Beluga Whale', 'Pilot Whale', 'Dugong', 'Sea Otter', 'Harbor Seal', 'Gray Seal', 'Fur Seal', 'Steller Sea Lion',
'Dolphin', 'Bottlenose Dolphin', 'Spinner Dolphin', 'Common Dolphin', 'Dusky Dolphin', 'Irrawaddy Dolphin', 'Ant',
'Bee', 'Wasp', 'Hornet', 'Termite', 'Dragonfly', 'Damselfly', 'Grasshopper', 'Cricket', 'Locust', 'Cockroach',
'Praying Mantis', 'Stick Insect', 'Leaf Insect', 'Centipede', 'Millipede', 'Scorpion', 'Spider', 'Tarantula',
'Black Widow', 'Brown Recluse', 'Orb Weaver', 'Wolf Spider', 'Jumping Spider', 'Crab Spider', 'Butterfly', 'Moth',
'Beetle', 'Ladybug', 'Firefly', 'Snail', 'Slug', 'Octopus', 'Squid', 'Cuttlefish', 'Nautilus', 'Clam', 'Oyster',
'Mussel', 'Scallop', 'Conch', 'Starfish', 'Sea Urchin', 'Sea Cucumber', 'Jellyfish', 'Portuguese Man O\' War',
'Coral', 'Sea Anemone', 'Crab', 'Hermit Crab', 'Lobster', 'Shrimp', 'Prawn', 'Krill', 'Barnacle', 'Isopod',
'Amphipod', 'Copepod', 'Tilapia', 'Salmon', 'Trout', 'Bass', 'Catfish', 'Carp', 'Goldfish', 'Koi', 'Betta Fish',
'Discus', 'Guppy', 'Swordfish', 'Tuna', 'Marlin', 'Barracuda', 'Piranha', 'Eel', 'Moray Eel', 'Clownfish',
'Angelfish', 'Butterflyfish', 'Lionfish', 'Grouper', 'Snapper', 'Flounder', 'Halibut', 'Cod', 'Anchovy', 'Sardine',
'Mackerel', 'Shark', 'Great White Shark', 'Hammerhead Shark', 'Whale Shark', 'Manta Ray', 'Stingray', 'Seahorse',
'Pipefish', 'Leafy Seadragon', 'Stonefish', 'Scorpionfish', 'Pufferfish', 'Boxfish', 'Triggerfish', 'Parrotfish',
'Blue Tang', 'Surgeonfish', 'Mandarinfish', 'Flying Fish', 'Mudskipper', 'Blowfish', 'Catshark', 'Lamprey',
'Sturgeon', 'Cichlid', 'Haddock', 'Herring', 'Swordtail', 'Turtle', 'Tortoise', 'Sea Turtle', 'Crocodile',
'Alligator', 'Gharial', 'Komodo Dragon', 'Monitor Lizard', 'Iguana', 'Gecko', 'Chameleon', 'Anole', 'Skink',
'Snake', 'Python', 'Boa', 'Rattlesnake', 'Cobra', 'Viper', 'Mamba', 'Coral Snake', 'King Cobra', 'Sea Snake',
'Garter Snake', 'Water Moccasin', 'Green Anaconda', 'Glass Lizard', 'Horned Lizard', 'Bearded Dragon', 'Uromastyx',
'Tegus', 'Frilled Lizard', 'Basilisk', 'Horned Toad', 'Chuckwalla', 'Frog', 'Toad', 'Tree Frog', 'Poison Dart Frog',
'Salamander', 'Newt', 'Axolotl', 'Caecilian', 'Bullfrog', 'Leopard Frog', 'Tiger Salamander', 'Fire-Bellied Toad',
'Hellbender', 'Mudpuppy', 'Giant Salamander', 'Glass Frog', 'Surinam Toad', 'Clawed Frog', 'Antelope', 'Bison',
'Elk', 'Moose', 'Caribou', 'Reindeer', 'Gnu', 'Impala', 'Kudu', 'Springbok', 'Gazelle', 'Eland', 'Zebu', 'Buffalo',
'Wild Boar', 'Warthog', 'Peccary', 'Tapir', 'Aardvark', 'Porcupine', 'Pangolin', 'Kangaroo Rat', 'Gopher', 'Marmot',
'Capybara', 'Patagonian Cavy', 'Nutria', 'Chinchilla', 'Degu', 'Agouti', 'Porpoise', 'Vaquita', 'Finless Porpoise',
'Chinese River Dolphin', 'Pink River Dolphin', 'Plains Zebra', 'Mountain Zebra', 'Grevy\'s Zebra', 'Giant Tortoise',
'Box Turtle', 'Painted Turtle', 'Snapping Turtle', 'Red-Eared Slider', 'Atlantic Cod', 'Horseshoe Crab',
'Sea Slug', 'Cone Snail', 'Moon Jellyfish', 'Sea Wasp', 'Stone Crab', 'Dungeness Crab', 'Blue Crab', 'Rock Crab',
'Snow Crab', 'King Crab', 'Ghost Shrimp', 'Cleaner Shrimp', 'Mantis Shrimp', 'Bamboo Shrimp', 'Cherry Shrimp',
'Tiger Shrimp', 'Whiteleg Shrimp', 'Red King Crab', 'Atlantic Salmon', 'Pacific Salmon', 'Sockeye Salmon',
'Chinook Salmon', 'Coho Salmon', 'Pink Salmon', 'Chum Salmon', 'Steelhead Trout', 'Rainbow Trout', 'Brown Trout',
'Lake Trout', 'Brook Trout', 'Char', 'Dolly Varden', 'Arctic Char', 'Bluegill', 'Sunfish', 'Perch', 'Walleye',
'Northern Pike', 'Musky', 'Black Crappie', 'White Crappie', 'Yellow Perch', 'Silver Carp', 'Bighead Carp',
'Grass Carp', 'Asian Carp', 'Common Carp', 'Butterfly Koi', 'Fancy Goldfish', 'Oranda', 'Lionhead', 'Ryukin',
'Ranchu', 'Shubunkin', 'Comet', 'Black Moor', 'Dolphinfish', 'Mahi Mahi', 'Wahoo', 'Kelp Bass', 'White Seabass',
'Barramundi', 'Mangrove Jack', 'Mutton Snapper', 'Red Snapper', 'Dog Snapper', 'Cubera Snapper', 'Yellowtail Snapper',
'Amberjack', 'Greater Amberjack', 'Lesser Amberjack', 'Yellowtail Amberjack', 'Cobia', 'Burbot', 'Codling',
'Lingcod', 'Rockfish', 'Black Rockfish', 'Yellowtail Rockfish', 'Canary Rockfish', 'Quillback Rockfish', 'Blue Marlin',
'Black Marlin', 'Striped Marlin', 'Shortbill Spearfish', 'Swordfish', 'Sailfish', 'Yellowfin Tuna', 'Bluefin Tuna',
'Albacore', 'Skipjack', 'Bigeye Tuna', 'Mackerel Tuna', 'Dogtooth Tuna', 'King Mackerel', 'Spanish Mackerel',
'Atlantic Mackerel', 'Pacific Mackerel', 'Wahoo', 'Dorado', 'Pacific Halibut', 'Atlantic Halibut', 'Greenland Halibut',
'Winter Flounder', 'Summer Flounder', 'Dab', 'Plaice', 'Brill', 'Turbot', 'Roughy', 'Orange Roughy', 'Smooth Dogfish',
'Sandbar Shark', 'Spinner Shark', 'Bull Shark', 'Nurse Shark', 'Basking Shark', 'Greenland Shark', 'Angel Shark',
'Thresher Shark', 'Megamouth Shark', 'Bonnethead', 'Sawfish', 'Yellowtail', 'Alfonsino', 'Opah', 'Monkfish', 'Hake',
'Pollock', 'Sablefish', 'Orange Roughy', 'Tilefish', 'Barracuda', 'Wolf Eel', 'Rock Greenling', 'Lingcod',
'Kelp Bass', 'Giant Sea Bass', 'White Seabass', 'Hagfish', 'Lamprey', 'Blue Jay', 'Crow', 'Sparrow', 'Robin',
'Eagle', 'Hawk', 'Vulture', 'Falcon', 'Owl', 'Pelican', 'Heron', 'Stork', 'Flamingo', 'Crane', 'Ibis', 'Dove',
'Pigeon', 'Albatross', 'Petrel', 'Shearwater', 'Storm-Petrel', 'Skua', 'Tern', 'Gull', 'Puffin', 'Auk', 'Murre',
'Guillemot', 'Razorbill', 'Dovekie', 'Parrot', 'Macaw', 'Cockatoo', 'Cockatiel', 'Lovebird', 'Lorikeet', 'Budgerigar',
'Toucan', 'Hornbill', 'Cuckoo', 'Roadrunner', 'Kingfisher', 'Woodpecker', 'Hummingbird', 'Swift', 'Nighthawk',
'Swallow', 'Wren', 'Nuthatch', 'Creeper', 'Kinglet', 'Warbler', 'Vireo', 'Thrush', 'Mockingbird', 'Catbird',
'Bluebird', 'Starling', 'Blackbird', 'Oriole', 'Jay', 'Magpie', 'Raven', 'Woodcock', 'Snipe', 'Sandpiper', 'Curlew',
'Godwit', 'Avocet', 'Stilt', 'Phalarope', 'Grouse', 'Ptarmigan', 'Pheasant', 'Quail', 'Turkey', 'Partridge', 'Peafowl',
'Guinea Fowl', 'Rhea', 'Emu', 'Cassowary', 'Kiwi', 'Penguin', 'Silkworm', 'Honeybee', 'Bumblebee', 'Hornworm',
'Waxworm', 'Black Soldier Fly', 'Ladybug', 'Predatory Mites', 'Parasitic Wasps', 'Nematodes', 'Hoverflies', 'Lacewings'
]
# Define roles for humans and non-humans
human_roles = [
'Volunteer for an Animal Advocacy Organisation',
'Donor to an Animal Advocacy Organisation',
'Staff Member of an Animal Advocacy Organisation',
'Researcher Studying Animal Advocacy Issues',
'Independent Animal Advocate',
'Animal Lawyer or Legal Advocate',
'Animal Carer or Rescuer',
'Vegan Influencer, Blogger or Content Creator',
'Owner of a Vegan or Cruelty-Free Company',
'Staff Member of a Vegan or Cruelty-Free Company',
'Investor in a Vegan or Cruelty-Free Company',
'Animal Rights Activist',
'Environmental Advocate',
'Wildlife Conservationist'
]
# Non-human roles as phrases
non_human_roles = [
'living in the wild',
'in captivity',
'on a farm',
'in a factory farm',
'in a research lab',
'in a sanctuary',
'in a zoo',
'used for entertainment',
'used for work',
'kept as a companion animal'
]
# Define all possible options for text fields
advocate_options = ['Yes', 'No']
lifestyle_options = [
'Vegan', 'Vegetarian', 'Omnivore', 'Pescatarian', 'Flexitarian', 'Raw Vegan', 'Paleo', 'Keto'
]
genders = [
'Male', 'Female', 'Non-binary', 'Genderqueer', 'Agender', 'Bigender', 'Genderfluid', 'Demiboy', 'Demigirl',
'Gender Nonconforming', 'Two-Spirit', 'Androgynous', 'Pangender', 'Transgender Man', 'Transgender Woman',
'Transmasculine', 'Transfeminine', 'Neutrois', 'Intersex', 'Third Gender', 'Questioning'
]
ethnicities = [
'Asian', 'Black', 'Hispanic or Latino', 'White', 'Middle Eastern', 'Native American', 'Pacific Islander',
'Arab', 'Persian', 'Kurdish', 'Assyrian', 'Armenian', 'Berber', 'Druze', 'Coptic', 'Yazidi',
'Afro-Caribbean', 'Afro-Latino', 'African American', 'Ethiopian', 'Somali', 'Hausa', 'Yoruba', 'Igbo', 'Zulu',
'Xhosa', 'Maasai', 'Swahili', 'Akan', 'Wolof', 'Fulani', 'Tuareg', 'Berber', 'Malian',
'Han Chinese', 'Hmong', 'Tibetan', 'Uyghur', 'Mongolian', 'Korean', 'Japanese', 'Okinawan',
'Thai', 'Khmer', 'Vietnamese', 'Laotian', 'Burmese', 'Shan', 'Kachin', 'Karen', 'Filipino',
'Indonesian', 'Javanese', 'Balinese', 'Sundanese', 'Malaysian', 'Malay', 'Iban', 'Kadazan-Dusun',
'Indian', 'Punjabi', 'Gujarati', 'Marathi', 'Bengali', 'Tamil', 'Telugu', 'Kannada', 'Malayali',
'Sinhalese', 'Sri Lankan Tamil', 'Nepali', 'Sherpa', 'Bhutanese', 'Maldivian',
'Pacific Islander', 'Hawaiian', 'Samoan', 'Tongan', 'Fijian', 'Maori', 'Papuan', 'Melanesian',
'Aboriginal Australian', 'Torres Strait Islander',
'Latino', 'Mexican', 'Puerto Rican', 'Cuban', 'Dominican', 'Salvadoran', 'Guatemalan', 'Honduran',
'Nicaraguan', 'Costa Rican', 'Panamanian', 'Colombian', 'Venezuelan', 'Peruvian', 'Bolivian', 'Ecuadorian',
'Chilean', 'Argentinian', 'Uruguayan', 'Paraguayan', 'Brazilian', 'Afro-Brazilian',
'Native American', 'Navajo', 'Cherokee', 'Sioux', 'Apache', 'Iroquois', 'Ojibwe', 'Hopi', 'Lakota',
'Chickasaw', 'Choctaw', 'Cree', 'Inuit', 'Métis', 'Maya', 'Aztec', 'Zapotec', 'Mixe', 'Mapuche',
'Quechua', 'Aymara', 'Guarani',
'European', 'British', 'Irish', 'Scottish', 'Welsh', 'English', 'French', 'German', 'Dutch', 'Belgian',
'Swiss', 'Austrian', 'Italian', 'Sicilian', 'Spanish', 'Basque', 'Catalan', 'Portuguese', 'Greek',
'Maltese', 'Albanian', 'Slavic', 'Russian', 'Ukrainian', 'Polish', 'Czech', 'Slovak', 'Slovenian', 'Croatian',
'Serbian', 'Bosniak', 'Macedonian', 'Bulgarian', 'Romanian', 'Hungarian', 'Latvian', 'Lithuanian',
'Estonian', 'Finnish', 'Swedish', 'Norwegian', 'Danish', 'Icelandic',
'Jewish', 'Ashkenazi', 'Sephardic', 'Mizrahi', 'Ethiopian Jewish',
'Central Asian', 'Kazakh', 'Uzbek', 'Tajik', 'Kyrgyz', 'Turkmen', 'Pashtun', 'Hazara', 'Baloch',
'Turkic', 'Turkish', 'Azeri', 'Uyghur', 'Tatar', 'Crimean Tatar',
'African', 'Moroccan', 'Algerian', 'Tunisian', 'Libyan', 'Egyptian', 'Sudanese', 'South Sudanese',
'Nigerian', 'Ghanaian', 'Kenyan', 'Tanzanian', 'Ugandan', 'South African', 'Zimbabwean', 'Zambian',
'Rwandan', 'Burundian', 'Congolese', 'Angolan', 'Mozambican', 'Eritrean', 'Djiboutian',
'Madagascan', 'Somali', 'Ethiopian', 'Malagasy', 'Senegalese', 'Liberian', 'Sierra Leonean', 'Gambian',
'Central African', 'Cameroonian', 'Chadian', 'Malian', 'Burkinabe', 'Nigerien',
'Caribbean', 'Jamaican', 'Haitian', 'Barbadian', 'Trinidadian', 'Bahamas', 'Grenadian', 'St. Lucian',
'Antiguan', 'Vincentian', 'St. Kitts and Nevis', 'Dominican (Commonwealth)',
'Middle Eastern', 'Lebanese', 'Syrian', 'Jordanian', 'Palestinian', 'Iraqi', 'Iranian', 'Yemeni',
'Omani', 'Emirati', 'Saudi Arabian', 'Kuwaiti', 'Qatari', 'Bahraini', 'Turkmen',
'South Asian', 'Bangladeshi', 'Sri Lankan', 'Pakistani', 'Afghan', 'Nepali',
'East Asian', 'Japanese', 'Korean', 'Chinese', 'Taiwanese', 'Mongolian',
'Southeast Asian', 'Thai', 'Vietnamese', 'Filipino', 'Malaysian', 'Indonesian', 'Singaporean', 'Cambodian',
'Laotian', 'Bruneian', 'Timorese', 'Papuan',
'Australian Aboriginal', 'Inuit', 'Métis', 'First Nations', 'Yupik', 'Aleut',
'Indigenous Brazilian', 'Yanomami', 'Kayapo', 'Guarani', 'Aymara', 'Quechua',
'Polynesian', 'Micronesian', 'Fijian', 'Tongan', 'Samoan', 'Hawaiian', 'Maori',
'Creole', 'Afro-Creole', 'Haitian Creole',
'Romani', 'Traveler', 'Gothic', 'Viking', 'Norse', 'Sami', 'Lapp',
'African Arab', 'Bedouin', 'Fulani', 'Tuareg',
'Mestizo', 'Mulatto', 'Zambo', 'Castizo'
]
countries = [
'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia',
'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria',
'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Central African Republic', 'Chad',
'Chile', 'China', 'Colombia', 'Comoros', 'Congo (Democratic Republic)', 'Congo (Republic)', 'Costa Rica',
'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic',
'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia',
'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada',
'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Iceland', 'India',
'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Ivory Coast', 'Jamaica', 'Japan', 'Jordan',
'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho',
'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia',
'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius', 'Mexico', 'Micronesia',
'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru',
'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Korea', 'North Macedonia',
'Norway', 'Oman', 'Pakistan', 'Palau', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines',
'Poland', 'Portugal', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia',
'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia',
'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands',
'Somalia', 'South Africa', 'South Korea', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname',
'Sweden', 'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Timor-Leste', 'Togo',
'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine',
'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Vatican City',
'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'
]
education_levels = [
'No Formal Education', 'Some Primary Education', 'Completed Primary Education', 'Some Secondary Education',
'Completed Secondary Education', 'High School Diploma', 'GED', 'Vocational Training', 'Technical Diploma',
'Associate Degree', 'Some College', 'Bachelor\'s Degree', 'Honors Bachelor\'s Degree', 'Postgraduate Diploma',
'Graduate Certificate', 'Professional Certification', 'Master\'s Degree', 'MBA', 'Specialist Degree',
'Doctorate Degree (PhD)', 'Doctorate Degree (EdD)', 'Doctorate Degree (DBA)', 'Professional Degree (JD)',
'Professional Degree (MD)', 'Professional Degree (DDS)', 'Professional Degree (DVM)', 'Postdoctoral Research',
'Trade School Certification', 'Apprenticeship', 'Adult Education Programs', 'Online Courses',
'Community College Diploma', 'Military Training', 'Self-Education', 'Alternative Education', 'Continuing Education'
]
income_levels = [
'Below Poverty Line', 'Very Low', 'Low', 'Lower-Middle', 'Middle', 'Upper-Middle', 'Comfortable',
'Affluent', 'High', 'Very High', 'Wealthy', 'Ultra-High Net Worth'
]
political_affiliations = [
'Far-Left', 'Left', 'Center-Left', 'Socialist', 'Democratic Socialist', 'Progressive',
'Liberal', 'Centrist', 'Center', 'Moderate', 'Center-Right', 'Conservative', 'Right', 'Far-Right',
'Libertarian', 'Anarchist', 'Authoritarian', 'Populist', 'Nationalist', 'Environmentalist',
'Green', 'Communist', 'Marxist', 'Maoist', 'Leninist', 'Trotskyist', 'Social Democrat',
'Christian Democrat', 'Social Conservative', 'Fiscal Conservative', 'Neo-Conservative',
'Cultural Conservative', 'Classical Liberal', 'Libertarian Socialist', 'Anarcho-Capitalist',
'Anarcho-Syndicalist', 'Eco-Socialist', 'Libertarian Left', 'Libertarian Right', 'Technocrat',
'Monarchist', 'Theocrat', 'Reactionary', 'Progressive Conservative', 'Paleoconservative',
'Neo-Liberal', 'Radical', 'Social Liberal', 'Economic Liberal', 'Ethno-Nationalist', 'Sovereigntist',
'Anti-Establishment', 'Feminist', 'Labor Unionist', 'Humanist', 'Anti-Globalist', 'Pro-Globalist'
]
religious_affiliations = [
'Christianity', 'Catholicism', 'Protestantism', 'Orthodox Christianity', 'Evangelical Christianity', 'Pentecostalism',
'Latter-day Saints (Mormonism)', 'Anglicanism', 'Baptist', 'Methodism', 'Lutheranism', 'Presbyterianism',
'Eastern Orthodox', 'Coptic Christianity', 'Islam', 'Sunni Islam', 'Shia Islam', 'Sufism', 'Ahmadiyya',
'Ibadi Islam', 'Nation of Islam', 'Hinduism', 'Shaivism', 'Vaishnavism', 'Shaktism', 'Smartism', 'Jainism',
'Buddhism', 'Theravada Buddhism', 'Mahayana Buddhism', 'Vajrayana Buddhism', 'Zen Buddhism', 'Pure Land Buddhism',
'Tibetan Buddhism', 'Nichiren Buddhism', 'Sikhism', 'Judaism', 'Orthodox Judaism', 'Conservative Judaism',
'Reform Judaism', 'Hasidic Judaism', 'Reconstructionist Judaism', 'Kabbalistic Judaism', 'Messianic Judaism',
'Atheism', 'Agnosticism', 'Secular Humanism', 'Deism', 'Unitarian Universalism', 'Spiritual but not Religious',
'Bahá\'í Faith', 'Zoroastrianism', 'Taoism', 'Confucianism', 'Shinto', 'Paganism', 'Wicca', 'Druidism',
'Neo-Paganism', 'Animism', 'Shamanism', 'Voodoo', 'Santería', 'Rastafarianism', 'New Age', 'Scientology',
'Gnosticism', 'Pantheism', 'Panentheism', 'Esoteric Beliefs', 'Diverse Indigenous Religions',
'African Traditional Religions', 'Candomblé', 'Umbanda', 'Native American Spirituality',
'Australian Aboriginal Spirituality', 'Juche', 'Falun Gong', 'Raelism', 'Pastafarianism (Church of the Flying Spaghetti Monster)'
]
# Generate accounts
for i in range(num_accounts):
id = random.randint(1_000_000, 9_999_999)
account = {}
# ID
account['id'] = id
# E-mail
account['email'] = f"synthetic_user_{uuid.uuid4()}@example.com"
# First Name
account['first_name'] = f"FirstName-{id}"
# Last Name
account['last_name'] = f"LastName-{id}"
# Randomly choose species
if random.random() < 0.5:
# Human account
account['species'] = 'Human'
# Assign human roles
account['role_in_animal_advocacy'] = random.choice(human_roles)
# Assign human-specific attributes
account['advocate_for_animals'] = random.choice(advocate_options)
account['current_lifestyle_diet'] = random.choice(lifestyle_options)
account['age'] = random.randint(18, 90)
account['gender'] = random.choice(genders)
account['ethnicity'] = random.choice(ethnicities)
account['country'] = random.choice(countries)
account['education_level'] = random.choice(education_levels)
account['income_level'] = random.choice(income_levels)
account['political_affiliation'] = random.choice(political_affiliations)
account['religious_affiliation'] = random.choice(religious_affiliations)
# Approach to animal advocacy (Scales from 0 to 1)
account['incrementalist_vs_abolitionist'] = round(random.uniform(0,1),2)
account['individual_vs_institutional'] = round(random.uniform(0,1),2)
account['solely_on_animal_activism_vs_intersectional'] = round(random.uniform(0,1),2)
account['focus_on_welfare_vs_rights'] = round(random.uniform(0,1),2)
account['diplomatic_vs_confrontational'] = round(random.uniform(0,1),2)
account['intuitive_vs_empirical_effectiveness'] = round(random.uniform(0,1),2)
# Psychometrics (Scales from 0 to 1)
account['openness_to_experience'] = round(random.uniform(0, 1), 2)
account['conscientiousness'] = round(random.uniform(0, 1), 2)
account['extraversion'] = round(random.uniform(0, 1), 2)
account['agreeableness'] = round(random.uniform(0, 1), 2)
account['neuroticism'] = round(random.uniform(0, 1), 2)
else:
# Non-human account
account['species'] = random.choice(non_human_species)
# Assign non-human roles
account['role'] = random.choice(non_human_roles)
accounts.append(account)
print(f"Generated account {i+1}/{num_accounts}: {account['email']}")
print("Finished generating synthetic accounts.")
return accounts
# Function to map scale values to descriptive terms
def map_scale_to_term(value, low_term, high_term):
"""
Map a scale value (0 to 1) to descriptive terms.
Args:
value (float): The scale value.
low_term (str): Description for low values.
high_term (str): Description for high values.
Returns:
str: Descriptive term corresponding to the value.
"""
if value < 0.25:
return f'Highly {low_term}'
elif value < 0.5:
return f'Moderately {low_term}'
elif value < 0.75:
return f'Moderately {high_term}'
else:
return f'Highly {high_term}'
def get_mime_type(url):
"""
Determine the MIME type based on the file extension.
Args:
url (str): The URL of the image file.
Returns:
str: The corresponding MIME type.
"""
extension = url.lower().split('.')[-1]
mime_types = {
'png': 'image/png',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'gif': 'image/gif',
'webp': 'image/webp',
'tiff': 'image/tiff',
'tif': 'image/tiff',
'bmp': 'image/bmp',
'heic': 'image/heic',
'heif': 'image/heif',
}
return mime_types.get(extension, 'image/jpeg') # Default to jpeg if unknown
# Function to process input data and generate output
def process_input_data(input_data, account):
"""
Process the input data to create an input task for the model.
Args:
input_data (dict): The input data from the JSON file.
account (dict): The synthetic account data.
Returns:
tuple(list, str)
list: The constructed input task.
str: The task type.
"""
print(f"Processing input data: {input_data}")
# Depending on the type of input_data, handle accordingly
if 'text' in input_data:
# Handle text data
return [
"Please evaluate the following text: ",
input_data['text']
], 'text'
if 'dialogue' in input_data:
# Handle dialogue data
dialogue_text = "\n".join([f"{item['author'].capitalize()}: {item['text']}" for item in input_data['dialogue']])
print("Processed dialogue data.")
return [
"Please evaluate the following dialogue, focusing primarily on the last speaker's message "
"while considering the conversational context: ",
dialogue_text
], 'chat'
if 'url' in input_data:
# Handle URL data (could be image or website)
url = input_data['url']
image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.tiff', '.heic', '.heif')
if any(ext in url.lower() for ext in image_extensions):
# It's an image
print(f"Processing image: {url}")
return [
"Please evaluate the following image: ",
Part.from_uri(url, mime_type=get_mime_type(url)),
], 'image'
else:
# It's a website
print(f"Processing website: {url}")
website_content = scrape_website(url)
if website_content:
return [
"Please evaluate the content of this website: ",
website_content
], 'html_content'
else:
print("ERROR: Website content could not be retrieved.")
# Unrecognized data format
print(f"Error processing input data: {json.dumps(input_data)}")
return [
"Please evaluate the following data: ",
json.dumps(input_data)
], 'text'
# Function to scrape website content with truncation and relevance focus
def scrape_website(url, max_chars=100000):
"""
Scrape text content from a website, focusing on the main content and truncating to a character limit.
Args:
url (str): The URL of the website.
max_chars (int): Maximum number of characters to return.
Returns:
str: The scraped and truncated text content.
"""
print(f"Attempting to scrape website at URL: {url}")
try:
response = requests.get(url)
if response.status_code == 200:
print("Website content retrieved successfully.")
soup = BeautifulSoup(response.content, 'lxml')
# Remove script, style, and irrelevant elements
for script in soup(["script", "style", "header", "footer", "nav", "aside"]):
script.decompose()
# Extract text from relevant content tags
content = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p', 'li', 'blockquote']):
text = tag.get_text(separator=' ', strip=True)
if text:
content.append(text)
# Combine the extracted text and truncate it
combined_content = ' '.join(content)
truncated_content = combined_content[:max_chars]
print(f"Truncated website content to {len(truncated_content)} characters.")
return truncated_content
else:
print(f"Failed to retrieve website content. Status code: {response.status_code}")
return None
except Exception as e:
print(f"Exception occurred while scraping website: {e}")
return None
# Function to use Vertex AI for generating output
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def generate_output_ranking(input_task, account):
"""
Use Vertex AI to generate an output based on the input task and account.
Args:
input_task (list): The input task for the model.
account (dict): The synthetic account data.
Returns:
str: The generated JSON response from the model.
"""
print("Generating output ranking using the Vertex AI model...")
try:
# Construct the approach description
persona = f"Your synthetic persona details: {account}"
prompt = [persona] + input_task
# Generate the response.
response = GENERATIVE_MODEL.generate_content(prompt)
print("Model response generated.")
return response.text
except Exception as e:
print(f"An error occurred while generating the output: {e}")
return None
# Main script
if __name__ == "__main__":
# Number of synthetic accounts to generate
num_accounts = 5 # Adjust this number as needed to ensure manageability
print(f"Starting main script. Dryrun={DRYRUN}")
# Generate synthetic accounts
accounts = generate_synthetic_accounts(num_accounts)
account_index = 0 # Start from the first account
while True:
print("Checking for JSON files in the input bucket...")
# List all JSON files in the input bucket
if DRYRUN:
blobs = list(input_bucket.list_blobs(prefix='response-feedback-English/', max_results=100))
else:
blobs = list(input_bucket.list_blobs())
json_blobs = [blob for blob in blobs if blob.name.endswith('.json')]
if not json_blobs:
print("No JSON files found in the input bucket. Waiting for new files...")
time.sleep(60) # Wait for 1 minute before checking again
continue
# Randomize the order of the JSON blobs
random.shuffle(json_blobs)
print("Randomized the order of input files.")
for blob in json_blobs:
try:
print(f"Processing file: {blob.name}")
# Download the JSON file
data = blob.download_as_bytes()
input_data = json.loads(data)
print("Input data loaded.")
# Get the current account
account = accounts[(account_index // 5) % len(accounts)]
account_index += 1
print(f"Using account {account_index}: {account['email']}")
# Process the input data to create an input task
input_task, task_type = process_input_data(input_data, account)
# Generate output ranking
response_text = generate_output_ranking(input_task, account)
if response_text is None:
print(f"Failed to generate response for {blob.name}")
continue
print(f"Generated response: {response_text}")
# Parse the response text as JSON
try:
response_json = json.loads(response_text)
print("Model response parsed as JSON.")
except json.JSONDecodeError as e:
print(f"Failed to parse response as JSON for {blob.name}: {e}")
continue
# Prepare the output data matching the sample output structure
result = []
schema_to_label = {
"is_content_harmful_to_animals": {
"from_name": "is_content_harmful_to_animals",
"type": "choices",
"value": {"choices": [response_json.get("is_content_harmful_to_animals", "No")]}
},
"explanation": {
"from_name": "explanation",
"type": "textarea",
"value": {"text": [response_json.get("explanation", "")]}
},
"rating_effect_on_animals": {
"from_name": "rating_effect_on_animals",
"type": "rating",
"value": {"rating": response_json.get("rating_effect_on_animals", 3)}
},
"rating_cultural_sensitivity": {
"from_name": "rating_cultural_sensitivity",
"type": "rating",
"value": {"rating": response_json.get("rating_cultural_sensitivity", 3)}
},
"rating_relevance": {
"from_name": "rating_relevance",
"type": "rating",
"value": {"rating": response_json.get("rating_relevance", 3)}
},
"rating_insight": {
"from_name": "rating_insight",
"type": "rating",
"value": {"rating": response_json.get("rating_insight", 3)}
},
"rating_trustworthiness": {
"from_name": "rating_trustworthiness",
"type": "rating",
"value": {"rating": response_json.get("rating_trustworthiness", 3)}
},
"rating_emotional_impact": {
"from_name": "rating_emotional_impact",
"type": "rating",
"value": {"rating": response_json.get("rating_emotional_impact", 3)}
},
"rating_rationality": {
"from_name": "rating_rationality",
"type": "rating",
"value": {"rating": response_json.get("rating_rationality", 3)}
},
"rating_influence": {
"from_name": "rating_influence",
"type": "rating",
"value": {"rating": response_json.get("rating_influence", 3)}
},
"rating_alignment": {
"from_name": "rating_alignment",
"type": "rating",
"value": {"rating": response_json.get("rating_alignment", 3)}
},
}
for key, mapping in schema_to_label.items():
result.append({
"id": str(uuid.uuid4()),
"from_name": mapping["from_name"],
"to_name": task_type,
"type": mapping["type"],
"value": mapping["value"],
"origin": "manual"
})
print("Result data prepared.")
# Prepare the output data
current_time = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f") + "Z"
task = {
"cancelled_annotations": 0,
"comment_authors": [],
"comment_count": 0,
"created_at": current_time,
"data": input_data,
"file_upload": None,
"id": random.randint(100000, 999999),
"inner_id": random.randint(100000, 999999),
"is_labeled": True,
"last_comment_updated_at": None,
"meta": {},
"overlap": 1,
"project": 2480,
"total_annotations": 1,
"total_predictions": 0,
"unresolved_comment_count": 0,
"updated_at": current_time,
"updated_by": None
}
output_data = {
"id": random.randint(10000, 1000000),
"created_username": f"{account['first_name']} {account['last_name']} {account['email']}, {account['id']}",
"created_ago": "0 minutes",
"completed_by": {
"id": account['id'],
"first_name": account['first_name'],
"last_name": account['last_name'],
"email": account['email'],
"synthetic_account": account
},
"draft_created_at": current_time,
"task": task,
"project": 2480,
"updated_by": account['id'],
"result": result,
"was_cancelled": False,
"ground_truth": False,
"created_at": current_time,
"updated_at": current_time,
"lead_time": random.uniform(1, 100),
"import_id": None,
"last_action": None,
"parent_prediction": None,
"parent_annotation": None,
"last_created_by": None,
}
print("Output data assembled.")
# Save the output data to the output bucket
output_name = blob.name.replace('.json', '') + f"-synthetic-{str(uuid.uuid4())}.json"
if DRYRUN:
print(f"DRYRUN: Processed {output_name}")
print(json.dumps(output_data, indent=2))
else:
output_blob = output_bucket.blob(output_name)
output_blob.upload_from_string(json.dumps(output_data, indent=2), content_type='application/json')
print(f"Processed and saved output for {output_name}")
except Exception as e:
print(f"An error occurred while processing {blob.name}: {e}")
# Sleep for a short while before checking for new files
print("Sleeping for 10 seconds before checking for new files...")
time.sleep(10)