-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommands.txt
34 lines (23 loc) · 2.34 KB
/
commands.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
for i in {1..920}; do echo "python browse_games.py ${i} > browse_pages/${i}.tsv"; done > run.browse
cluster_job.py -i run.browse -o clust -nl 10 -f -Q
# Header: rank, figure, href, name, year, geek_rate, avg_rate, num_voters, market
cat browse_pages/* > game_list.tsv
rm -r clust/ browse_pages/
mkdir -p json.chunks/
n=$(echo $(cat game_list.tsv | wc -l)/500 | bc); for i in $(eval echo {0..${n}}); do echo "python retrieve_game_info.py $(cat game_list.tsv | tail -n+$((i*500)) | head -500 | cut -f3 | cut -f3 -d/ | paste -sd ',') > json.chunks/${i}.json"; done > run.retrieve
cluster_job.py -i run.retrieve -o clust -nl 100 -f -q rg-el7,short-sl7 -Q
cat json.chunks/*.json | sed 's/^[{]//;s/[}]$/,/' | sed '1i{' | sed '$ s/.$//' | sed -e "\$a}" > mdata.games.json
cat mdata.games.json | python -m json.tool > mdata.games.format.json
python json2table.py mdata.games.json > mdata.games.tsv
cat game_list.tsv | awk 'BEGIN{OFS=FS="\t"}{split($3,a,"/");print a[3]"\t"$5}' | python join.py -a stdin -b mdata.games.tsv --b_header | sed '1s/V1/year/' > mdata.games.year.tsv
gzip < mdata.games.year.tsv > mdata.games.year.tsv.gz
# USERS
grep username mdata.games.format.json | awk -F '"' '{print $(NF-1)}' | sort | uniq -c | awk '{print $2"\t"$1}' > users.list.txt
shuf users.list.txt | head -10000 | cut -f1 | while read name; do echo "python retrieve_users_info.py $name > \"users/${name}.json\""; done > run.users
ll users/ | sed 's/\*$//' | awk '$5==0{print $NF}' | fgrep -wf - run.users | bash 2> /dev/null &
cat run.users | awk '{print $3}' | fgrep -vf - users.list.txt | shuf | head -10000 | cut -f1 | while read name; do echo "python retrieve_users_info.py $name > \"users1/${name}.json\""; done > run.users1
cat users*/*json | sed 's/^[{]//;s/[}]$/,/' | sed '1i{' | sed '$ s/.$//' | sed -e "\$a}" > mdata.users.json
cat mdata.users.json | python -m json.tool > mdata.users.format.json
python json2table.users.py mdata.users.json
# Add the country information to the comments. Take only users for which there is country info
cat comments.users.tsv | awk -v f="mdata.users.tsv" -v c="country.dict.tsv" 'BEGIN{OFS=FS="\t";while(getline<c>0){country[$1]=$2} while(getline<f>0){if($2!="NA"&&country[$2]!=""){d[$7]=($2=="United States"?($8!="NA"&&country[$8]!=""?$8:$2):$2)}}} NR==1 {print $0, "country"} NR>1 && d[$3]!="" && $3!="" {print $0, country[d[$3]]}' > comments.users.country.tsv