-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path00getprojbyid.pl
47 lines (41 loc) · 977 Bytes
/
00getprojbyid.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#! /usr/bin/env perl
# script to get BioProject by ID
# STDIN: list of IDs
# STDOUT: API output
# usage: perl 00getprojbyid.pl < id_list_unique.txt | pigz -c > out.json.gz
my $rows = 1000;
my $sleep = 1;
my $i = 0;
my $apip = "IP.txt";
my @ids;
# get IP of DBCLS SRA API
open(FILE, "$apip") or die "Cannot open $apip";
while(<FILE>) {
chomp;
$apip = $_;
}
close FILE;
# read list of IDs (BioProject)
while(<STDIN>) {
chomp;
$i++;
push(@ids, "\"$_\"");
if(($i % $rows) == 0) {
my $id = join(',',@ids);
#print "$id\n\n";
@ids = ();
&gethoge($id);
}
}
$id = join(',',@ids);
&gethoge($id);
sub gethoge {
open(FILE, "curl -s -X POST -d '{\"ids\":[$_[0]]}' http://$apip/api/bioproject |") or die "Cannot open API\n";
# after the scraping, '\n' should be inserted by running the following command
while(<FILE>) {
s/\{\"Package/\n\{\"Package/g;
print "$_\n";
}
close FILE;
sleep $sleep;
}