From ec1670a24d6002bd34cefdaee09fb21b1c0c7241 Mon Sep 17 00:00:00 2001 From: Masashi Yokochi Date: Fri, 24 Sep 2021 20:44:23 +0900 Subject: [PATCH] Update Virtuoso data upload scripts for PDF/RDF, chem_comp/RDF, PRD/RDF, and VRPT/RDF(-alt) --- README.md | 1 + virtuoso_scripts/update_virtuoso_cc.sh | 126 ++++++++++++++++++ virtuoso_scripts/update_virtuoso_pdb.sh | 126 ++++++++++++++++++ virtuoso_scripts/update_virtuoso_prd.sh | 126 ++++++++++++++++++ ...te_virtuoso.sh => update_virtuoso_vrpt.sh} | 8 +- ...oso_alt.sh => update_virtuoso_vrpt_alt.sh} | 8 +- virtuoso_scripts/virtuoso_env.sh | 3 + 7 files changed, 390 insertions(+), 8 deletions(-) create mode 100755 virtuoso_scripts/update_virtuoso_cc.sh create mode 100755 virtuoso_scripts/update_virtuoso_pdb.sh create mode 100755 virtuoso_scripts/update_virtuoso_prd.sh rename virtuoso_scripts/{update_virtuoso.sh => update_virtuoso_vrpt.sh} (94%) rename virtuoso_scripts/{update_virtuoso_alt.sh => update_virtuoso_vrpt_alt.sh} (93%) diff --git a/README.md b/README.md index 7db1b97c..22175067 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,7 @@ File path | Document - Add resources for PRD/RDF. - Fix stylesheets to remove category element that has no children. (PDB ID: 7lqs) - Fix XSD data type of '_pdbx_em_volume_estimate_marker.enclosed_volume' and '_pdbx_em_density_distribution_marker.map_value' (xsd:double). + - Update Virtuoso data upload scripts for PDF/RDF, chem_comp/RDF, PRD/RDF, and VRPT/RDF(-alt). - **Sep 17, 2021**: Release v4.2.0 - Integration of [SIFTS](https://www.ebi.ac.uk/pdbe/docs/sifts/) into PDB/RDF that includes HTML links to GO, InterPro, Pfam, CATH domain, SCOP/SCOP2/SCOP2B, and Ensembl from '_pdbx_sifts_xref_db_segments' category. diff --git a/virtuoso_scripts/update_virtuoso_cc.sh b/virtuoso_scripts/update_virtuoso_cc.sh new file mode 100755 index 00000000..0f718601 --- /dev/null +++ b/virtuoso_scripts/update_virtuoso_cc.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +source ./scripts/env.sh +source ./virtuoso_scripts/virtuoso_env.sh + +MAXPROCS=`echo "scale=0; $MAXPROCS / 2.5" | bc` + +if [ $MAXPROCS = 0 ] ; then + MAXPROCS=1 +fi + +DB_NAME=cc + +rm -f /tmp/cc-virtuoso-last + +init=false +change=`find $RDF_CC -name '*.rdf.gz' -mtime -4 | wc -l` + +which isql &> /dev/null + +if [ $? != 0 ] ; then + + echo "isql: command not found..." + echo "Please install Virtuoso (https://virtuoso.openlinksw.com/)." + exit 1 + +fi + +./virtuoso_scripts/start_virtuoso.sh || exit 1 + +sleep 180 + +GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME + +graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1 + +if [ $graph_exist = 1 ] && [ $init = "false" ] ; then + + if [ $change = 0 ] ; then + echo $DB_NAME is update. + fi + + exit 0 + +fi + +echo +echo "Do you want to update Virtuoso DB ($GRAPH_URI)? (y [n]) " + +read ans + +case $ans in + y*|Y*) ;; + *) echo skipped. + exit 1;; +esac + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="status();" || exit 1 + +rm -rf $RDF_CC_LINK +mkdir -p $RDF_CC_LINK + +cd $RDF_CC_LINK + +rdf_file_list=rdf_file_list + +find ../$RDF_CC -type f -iname "*.rdf.gz" > $rdf_file_list + +while read rdf_file +do + + ln -s $rdf_file . + +done < $rdf_file_list + +rm -f $rdf_file_list + +err=$DB_NAME"_err" + +if [ $graph_exist = 1 ] ; then + + VIRTUOSO_EXEC_COM="log_enable(3,1); SPARQL CLEAR GRAPH <$GRAPH_URI>;" + echo $VIRTUOSO_EXEC_COM + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + + VIRTUOSO_EXEC_COM="log_enable(3,1); DELETE FROM rdf_quad WHERE g = iri_to_id ('$GRAPH_URI');" + echo $VIRTUOSO_EXEC_COM + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + +fi + +VIRTUOSO_EXEC_COM="ld_dir('$PWD', '*.rdf.gz', '$GRAPH_URI');" +echo $VIRTUOSO_EXEC_COM + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + +grep Error $err &> /dev/null || ( cat $err && exit 1 ) + +rm -f $err + +for proc_id in `seq 1 $MAXPROCS` ; do + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="rdf_loader_run();" & + +done + +if [ $? != 0 ] ; then + exit 1 +fi + +wait + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1 + +date -u +"%b %d, %Y" > /tmp/cc-virtuoso-last + +echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed." + +echo -n $"Stopping virtuoso-t daemon: " + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS -K + +echo + diff --git a/virtuoso_scripts/update_virtuoso_pdb.sh b/virtuoso_scripts/update_virtuoso_pdb.sh new file mode 100755 index 00000000..771cb397 --- /dev/null +++ b/virtuoso_scripts/update_virtuoso_pdb.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +source ./scripts/env.sh +source ./virtuoso_scripts/virtuoso_env.sh + +MAXPROCS=`echo "scale=0; $MAXPROCS / 2.5" | bc` + +if [ $MAXPROCS = 0 ] ; then + MAXPROCS=1 +fi + +DB_NAME=pdb + +rm -f /tmp/pdb-virtuoso-last + +init=false +change=`find $RDF -name '*.rdf.gz' -mtime -4 | wc -l` + +which isql &> /dev/null + +if [ $? != 0 ] ; then + + echo "isql: command not found..." + echo "Please install Virtuoso (https://virtuoso.openlinksw.com/)." + exit 1 + +fi + +./virtuoso_scripts/start_virtuoso.sh || exit 1 + +sleep 180 + +GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME + +graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1 + +if [ $graph_exist = 1 ] && [ $init = "false" ] ; then + + if [ $change = 0 ] ; then + echo $DB_NAME is update. + fi + + exit 0 + +fi + +echo +echo "Do you want to update Virtuoso DB ($GRAPH_URI)? (y [n]) " + +read ans + +case $ans in + y*|Y*) ;; + *) echo skipped. + exit 1;; +esac + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="status();" || exit 1 + +rm -rf $RDF_LINK +mkdir -p $RDF_LINK + +cd $RDF_LINK + +rdf_file_list=rdf_file_list + +find ../$RDF -type f -iname "*.rdf.gz" > $rdf_file_list + +while read rdf_file +do + + ln -s $rdf_file . + +done < $rdf_file_list + +rm -f $rdf_file_list + +err=$DB_NAME"_err" + +if [ $graph_exist = 1 ] ; then + + VIRTUOSO_EXEC_COM="log_enable(3,1); SPARQL CLEAR GRAPH <$GRAPH_URI>;" + echo $VIRTUOSO_EXEC_COM + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + + VIRTUOSO_EXEC_COM="log_enable(3,1); DELETE FROM rdf_quad WHERE g = iri_to_id ('$GRAPH_URI');" + echo $VIRTUOSO_EXEC_COM + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + +fi + +VIRTUOSO_EXEC_COM="ld_dir('$PWD', '*.rdf.gz', '$GRAPH_URI');" +echo $VIRTUOSO_EXEC_COM + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + +grep Error $err &> /dev/null || ( cat $err && exit 1 ) + +rm -f $err + +for proc_id in `seq 1 $MAXPROCS` ; do + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="rdf_loader_run();" & + +done + +if [ $? != 0 ] ; then + exit 1 +fi + +wait + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1 + +date -u +"%b %d, %Y" > /tmp/pdb-virtuoso-last + +echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed." + +echo -n $"Stopping virtuoso-t daemon: " + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS -K + +echo + diff --git a/virtuoso_scripts/update_virtuoso_prd.sh b/virtuoso_scripts/update_virtuoso_prd.sh new file mode 100755 index 00000000..739e2d18 --- /dev/null +++ b/virtuoso_scripts/update_virtuoso_prd.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +source ./scripts/env.sh +source ./virtuoso_scripts/virtuoso_env.sh + +MAXPROCS=`echo "scale=0; $MAXPROCS / 2.5" | bc` + +if [ $MAXPROCS = 0 ] ; then + MAXPROCS=1 +fi + +DB_NAME=prd + +rm -f /tmp/prd-virtuoso-last + +init=false +change=`find $RDF_PRD -name '*.rdf.gz' -mtime -4 | wc -l` + +which isql &> /dev/null + +if [ $? != 0 ] ; then + + echo "isql: command not found..." + echo "Please install Virtuoso (https://virtuoso.openlinksw.com/)." + exit 1 + +fi + +./virtuoso_scripts/start_virtuoso.sh || exit 1 + +sleep 180 + +GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME + +graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1 + +if [ $graph_exist = 1 ] && [ $init = "false" ] ; then + + if [ $change = 0 ] ; then + echo $DB_NAME is update. + fi + + exit 0 + +fi + +echo +echo "Do you want to update Virtuoso DB ($GRAPH_URI)? (y [n]) " + +read ans + +case $ans in + y*|Y*) ;; + *) echo skipped. + exit 1;; +esac + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="status();" || exit 1 + +rm -rf $RDF_BIRD_LINK +mkdir -p $RDF_BIRD_LINK + +cd $RDF_BIRD_LINK + +rdf_file_list=rdf_file_list + +find ../$RDF_PRD -type f -iname "*.rdf.gz" > $rdf_file_list + +while read rdf_file +do + + ln -s $rdf_file . + +done < $rdf_file_list + +rm -f $rdf_file_list + +err=$DB_NAME"_err" + +if [ $graph_exist = 1 ] ; then + + VIRTUOSO_EXEC_COM="log_enable(3,1); SPARQL CLEAR GRAPH <$GRAPH_URI>;" + echo $VIRTUOSO_EXEC_COM + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + + VIRTUOSO_EXEC_COM="log_enable(3,1); DELETE FROM rdf_quad WHERE g = iri_to_id ('$GRAPH_URI');" + echo $VIRTUOSO_EXEC_COM + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + +fi + +VIRTUOSO_EXEC_COM="ld_dir('$PWD', '*.rdf.gz', '$GRAPH_URI');" +echo $VIRTUOSO_EXEC_COM + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="$VIRTUOSO_EXEC_COM" 2> $err || ( cat $err && exit 1 ) + +grep Error $err &> /dev/null || ( cat $err && exit 1 ) + +rm -f $err + +for proc_id in `seq 1 $MAXPROCS` ; do + + isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="rdf_loader_run();" & + +done + +if [ $? != 0 ] ; then + exit 1 +fi + +wait + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1 + +date -u +"%b %d, %Y" > /tmp/prd-virtuoso-last + +echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed." + +echo -n $"Stopping virtuoso-t daemon: " + +isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS -K + +echo + diff --git a/virtuoso_scripts/update_virtuoso.sh b/virtuoso_scripts/update_virtuoso_vrpt.sh similarity index 94% rename from virtuoso_scripts/update_virtuoso.sh rename to virtuoso_scripts/update_virtuoso_vrpt.sh index bb5288da..8fc886b0 100755 --- a/virtuoso_scripts/update_virtuoso.sh +++ b/virtuoso_scripts/update_virtuoso_vrpt.sh @@ -9,9 +9,9 @@ if [ $MAXPROCS = 0 ] ; then MAXPROCS=1 fi -DB_NAME=pdb-validation +DB_NAME=vrpt -rm -f /tmp/pdbxv-virtuoso-last +rm -f /tmp/vrpt-virtuoso-last init=false change=`find $RDF_VALID -name '*.rdf.gz' -mtime -4 | wc -l` @@ -30,7 +30,7 @@ fi sleep 180 -GRAPH_URI=https://rdf.wwpdb.org/$DB_NAME +GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1 @@ -114,7 +114,7 @@ wait isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1 -date -u +"%b %d, %Y" > /tmp/pdbxv-virtuoso-last +date -u +"%b %d, %Y" > /tmp/vrpt-virtuoso-last echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed." diff --git a/virtuoso_scripts/update_virtuoso_alt.sh b/virtuoso_scripts/update_virtuoso_vrpt_alt.sh similarity index 93% rename from virtuoso_scripts/update_virtuoso_alt.sh rename to virtuoso_scripts/update_virtuoso_vrpt_alt.sh index db937858..a93fcd62 100755 --- a/virtuoso_scripts/update_virtuoso_alt.sh +++ b/virtuoso_scripts/update_virtuoso_vrpt_alt.sh @@ -9,9 +9,9 @@ if [ $MAXPROCS = 0 ] ; then MAXPROCS=1 fi -DB_NAME=pdb-validation-alt +DB_NAME=vrpt -rm -f /tmp/pdbxv-alt-virtuoso-last +rm -f /tmp/vrpt-alt-virtuoso-last init=false change=`find $RDF_VALID_ALT -name '*.rdf.gz' -mtime -4 | wc -l` @@ -30,7 +30,7 @@ fi sleep 180 -GRAPH_URI=https://rdf.wwpdb.org/$DB_NAME +GRAPH_URI=http://rdf.wwpdb.org/$DB_NAME graph_exist=`./virtuoso_scripts/ask_graph_existance.sh $GRAPH_URI` || exit 1 @@ -114,7 +114,7 @@ wait isql $VIRTUOSO_DB_PORT $VIRTUOSO_DB_USER $VIRTUOSO_DB_PASS exec="checkpoint;" || exit 1 -date -u +"%b %d, %Y" > /tmp/pdbxv-alt-virtuoso-last +date -u +"%b %d, %Y" > /tmp/vrpt-alt-virtuoso-last echo "RDF->VIRTUOSO (prefix:"$DB_NAME") is completed." diff --git a/virtuoso_scripts/virtuoso_env.sh b/virtuoso_scripts/virtuoso_env.sh index 593215c7..9193efac 100755 --- a/virtuoso_scripts/virtuoso_env.sh +++ b/virtuoso_scripts/virtuoso_env.sh @@ -4,5 +4,8 @@ VIRTUOSO_DB_PORT=1111 VIRTUOSO_DB_USER=dba VIRTUOSO_DB_PASS=dba +RDF_LINK=RDF-link +RDF_CC_LINK=RDF-chem_comp-link +RDF_BIRD_LINK=RDF-bird-link RDF_VALID_LINK=RDF-validation-link