Skip to content

Commit

Permalink
Update Virtuoso data upload scripts for PDF/RDF, chem_comp/RDF, PRD/R…
Browse files Browse the repository at this point in the history
…DF, and VRPT/RDF(-alt)
  • Loading branch information
Masashi Yokochi committed Sep 24, 2021
1 parent 3c7a600 commit ec1670a
Show file tree
Hide file tree
Showing 7 changed files with 390 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ File path | Document
- Add resources for PRD/RDF.
- Fix stylesheets to remove category element that has no children. (PDB ID: 7lqs)
- Fix XSD data type of '_pdbx_em_volume_estimate_marker.enclosed_volume' and '_pdbx_em_density_distribution_marker.map_value' (xsd:double).
- Update Virtuoso data upload scripts for PDF/RDF, chem_comp/RDF, PRD/RDF, and VRPT/RDF(-alt).

- **Sep 17, 2021**: Release v4.2.0
- Integration of [SIFTS](https://www.ebi.ac.uk/pdbe/docs/sifts/) into PDB/RDF that includes HTML links to GO, InterPro, Pfam, CATH domain, SCOP/SCOP2/SCOP2B, and Ensembl from '_pdbx_sifts_xref_db_segments' category.
Expand Down
126 changes: 126 additions & 0 deletions virtuoso_scripts/update_virtuoso_cc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/bin/bash

source ./scripts/env.sh
source ./virtuoso_scripts/virtuoso_env.sh

MAXPROCS=`echo "scale=0; $MAXPROCS / 2.5" | bc`

if [ $MAXPROCS = 0 ] ; then
MAXPROCS=1
fi

DB_NAME=cc

rm -f /tmp/cc-virtuoso-last

init=false
change=`find $RDF_CC -name '*.rdf.gz' -mtime -4 | wc -l`

which isql &> /dev/null

if [ $? != 0 ] ; then

echo "isql: command not found..."
echo "Please install Virtuoso (https://virtuoso.openlinksw.com/)."
exit 1

fi

./virtuoso_scripts/start_virtuoso.sh || exit 1

sleep 180

GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME

graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1

if [ $graph_exist = 1 ] && [ $init = "false" ] ; then

if [ $change = 0 ] ; then
echo $DB_NAME is update.
fi

exit 0

fi

echo
echo "Do you want to update Virtuoso DB ($GRAPH_URI)? (y [n]) "

read ans

case $ans in
y*|Y*) ;;
*) echo skipped.
exit 1;;
esac

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="status();" || exit 1

rm -rf $RDF_CC_LINK
mkdir -p $RDF_CC_LINK

cd $RDF_CC_LINK

rdf_file_list=rdf_file_list

find ../$RDF_CC -type f -iname "*.rdf.gz" > $rdf_file_list

while read rdf_file
do

ln -s $rdf_file .

done < $rdf_file_list

rm -f $rdf_file_list

err=$DB_NAME"_err"

if [ $graph_exist = 1 ] ; then

VIRTUOSO_EXEC_COM="log_enable(3,1); SPARQL CLEAR GRAPH <$GRAPH_URI>;"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

VIRTUOSO_EXEC_COM="log_enable(3,1); DELETE FROM rdf_quad WHERE g = iri_to_id ('$GRAPH_URI');"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

fi

VIRTUOSO_EXEC_COM="ld_dir('$PWD', '*.rdf.gz', '$GRAPH_URI');"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

grep Error $err &> /dev/null || ( cat $err && exit 1 )

rm -f $err

for proc_id in `seq 1 $MAXPROCS` ; do

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="rdf_loader_run();" &

done

if [ $? != 0 ] ; then
exit 1
fi

wait

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1

date -u +"%b %d, %Y" > /tmp/cc-virtuoso-last

echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed."

echo -n $"Stopping virtuoso-t daemon: "

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS -K

echo

126 changes: 126 additions & 0 deletions virtuoso_scripts/update_virtuoso_pdb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/bin/bash

source ./scripts/env.sh
source ./virtuoso_scripts/virtuoso_env.sh

MAXPROCS=`echo "scale=0; $MAXPROCS / 2.5" | bc`

if [ $MAXPROCS = 0 ] ; then
MAXPROCS=1
fi

DB_NAME=pdb

rm -f /tmp/pdb-virtuoso-last

init=false
change=`find $RDF -name '*.rdf.gz' -mtime -4 | wc -l`

which isql &> /dev/null

if [ $? != 0 ] ; then

echo "isql: command not found..."
echo "Please install Virtuoso (https://virtuoso.openlinksw.com/)."
exit 1

fi

./virtuoso_scripts/start_virtuoso.sh || exit 1

sleep 180

GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME

graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1

if [ $graph_exist = 1 ] && [ $init = "false" ] ; then

if [ $change = 0 ] ; then
echo $DB_NAME is update.
fi

exit 0

fi

echo
echo "Do you want to update Virtuoso DB ($GRAPH_URI)? (y [n]) "

read ans

case $ans in
y*|Y*) ;;
*) echo skipped.
exit 1;;
esac

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="status();" || exit 1

rm -rf $RDF_LINK
mkdir -p $RDF_LINK

cd $RDF_LINK

rdf_file_list=rdf_file_list

find ../$RDF -type f -iname "*.rdf.gz" > $rdf_file_list

while read rdf_file
do

ln -s $rdf_file .

done < $rdf_file_list

rm -f $rdf_file_list

err=$DB_NAME"_err"

if [ $graph_exist = 1 ] ; then

VIRTUOSO_EXEC_COM="log_enable(3,1); SPARQL CLEAR GRAPH <$GRAPH_URI>;"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

VIRTUOSO_EXEC_COM="log_enable(3,1); DELETE FROM rdf_quad WHERE g = iri_to_id ('$GRAPH_URI');"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

fi

VIRTUOSO_EXEC_COM="ld_dir('$PWD', '*.rdf.gz', '$GRAPH_URI');"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

grep Error $err &> /dev/null || ( cat $err && exit 1 )

rm -f $err

for proc_id in `seq 1 $MAXPROCS` ; do

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="rdf_loader_run();" &

done

if [ $? != 0 ] ; then
exit 1
fi

wait

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1

date -u +"%b %d, %Y" > /tmp/pdb-virtuoso-last

echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed."

echo -n $"Stopping virtuoso-t daemon: "

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS -K

echo

126 changes: 126 additions & 0 deletions virtuoso_scripts/update_virtuoso_prd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/bin/bash

source ./scripts/env.sh
source ./virtuoso_scripts/virtuoso_env.sh

MAXPROCS=`echo "scale=0; $MAXPROCS / 2.5" | bc`

if [ $MAXPROCS = 0 ] ; then
MAXPROCS=1
fi

DB_NAME=prd

rm -f /tmp/prd-virtuoso-last

init=false
change=`find $RDF_PRD -name '*.rdf.gz' -mtime -4 | wc -l`

which isql &> /dev/null

if [ $? != 0 ] ; then

echo "isql: command not found..."
echo "Please install Virtuoso (https://virtuoso.openlinksw.com/)."
exit 1

fi

./virtuoso_scripts/start_virtuoso.sh || exit 1

sleep 180

GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME

graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1

if [ $graph_exist = 1 ] && [ $init = "false" ] ; then

if [ $change = 0 ] ; then
echo $DB_NAME is update.
fi

exit 0

fi

echo
echo "Do you want to update Virtuoso DB ($GRAPH_URI)? (y [n]) "

read ans

case $ans in
y*|Y*) ;;
*) echo skipped.
exit 1;;
esac

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="status();" || exit 1

rm -rf $RDF_BIRD_LINK
mkdir -p $RDF_BIRD_LINK

cd $RDF_BIRD_LINK

rdf_file_list=rdf_file_list

find ../$RDF_PRD -type f -iname "*.rdf.gz" > $rdf_file_list

while read rdf_file
do

ln -s $rdf_file .

done < $rdf_file_list

rm -f $rdf_file_list

err=$DB_NAME"_err"

if [ $graph_exist = 1 ] ; then

VIRTUOSO_EXEC_COM="log_enable(3,1); SPARQL CLEAR GRAPH <$GRAPH_URI>;"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

VIRTUOSO_EXEC_COM="log_enable(3,1); DELETE FROM rdf_quad WHERE g = iri_to_id ('$GRAPH_URI');"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

fi

VIRTUOSO_EXEC_COM="ld_dir('$PWD', '*.rdf.gz', '$GRAPH_URI');"
echo $VIRTUOSO_EXEC_COM

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 )

grep Error $err &> /dev/null || ( cat $err && exit 1 )

rm -f $err

for proc_id in `seq 1 $MAXPROCS` ; do

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="rdf_loader_run();" &

done

if [ $? != 0 ] ; then
exit 1
fi

wait

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1

date -u +"%b %d, %Y" > /tmp/prd-virtuoso-last

echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed."

echo -n $"Stopping virtuoso-t daemon: "

isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS -K

echo

Loading

0 comments on commit ec1670a

Please sign in to comment.