diff --git a/README.md b/README.md
index 7de0aee..baac250 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,19 @@
-# 2022-mtx-not-in-mgx-pairs
+# Identifying sequences that are in a metatranscriptome but not in a metagenome
+
+This repository curates a set of paired metagenomes and metatranscriptomes and provides a pipeline to rapidly identify the fraction of sequences in a metatranscriptome that are not in a metagenome.
+The pipeline is shaped around a metadata file, 'inputs/metadata-paired-mgx-mtx.tsv', that contains a sample name (`sample_name`), metagenome SRA run accession (`mgx_run_accession`; `SRR*`, `ERR*`, `DRR*`), metatranscriptome SRA run accession (`mtx_run_accession`), and a sample type (`sample_type`).
+Using the run accessions, it downloads the sequencing data from the SRA and generates a [FracMinHash sketch](https://www.biorxiv.org/content/10.1101/2022.01.11.475838v2.abstract) of each run.
+Then, it uses the paired information encoded in the metadata table to subtract the metagenome sketch from the metatranscriptome sketch.
+This produces an estimate of the fraction metatranscriptome sequences not found in the paired metagenome.
+These estimates are also clustered by `sample_type` to generate biome-specific estimates.
+The pipeline also analyzes the fraction of metatranscriptome-specific sequences that are shared between samples to discover what fraction of sequences we are systematically missing within and across biomes.
+
+Some metagenome and metatranscripome pairs are true pairs that were extracted from the same sample while others are from separate samples taken from the same location at the same time.
+
+## Getting started with this repository
+
+TBD
+
+## Next steps
+
+TBD
diff --git a/inputs/metadata-paired-mgx-mtx.tsv b/inputs/metadata-paired-mgx-mtx.tsv
new file mode 100644
index 0000000..1a633a4
--- /dev/null
+++ b/inputs/metadata-paired-mgx-mtx.tsv
@@ -0,0 +1,1281 @@
+sample_name mtx_study_accession mtx_run_accession mgx_study_accession mgx_run_accession sample_type
+C_2_1 PRJNA406858 SRR6032600 PRJNA406858 SRR6032602 activated_sludge
+B_2_1 PRJNA406858 SRR6032604 PRJNA406858 SRR6032601 activated_sludge
+E_2_1 PRJNA406858 SRR6032605 PRJNA406858 SRR6032603 activated_sludge
+rumen_microbiome_of_beef_cattle_101 PRJNA448333 SRR8416057 PRJNA448333 SRR8397906 cattle_rumen
+rumen_microbiome_of_beef_cattle_103 PRJNA448333 SRR8416058 PRJNA448333 SRR8404214 cattle_rumen
+rumen_microbiome_of_beef_cattle_104 PRJNA448333 SRR8416055 PRJNA448333 SRR8397905 cattle_rumen
+rumen_microbiome_of_beef_cattle_105 PRJNA448333 SRR8416056 PRJNA448333 SRR8397904 cattle_rumen
+rumen_microbiome_of_beef_cattle_106 PRJNA448333 SRR8416061 PRJNA448333 SRR8397903 cattle_rumen
+rumen_microbiome_of_beef_cattle_107 PRJNA448333 SRR8416062 PRJNA448333 SRR8397910 cattle_rumen
+rumen_microbiome_of_beef_cattle_112 PRJNA448333 SRR8416059 PRJNA448333 SRR8397909 cattle_rumen
+rumen_microbiome_of_beef_cattle_201 PRJNA448333 SRR8416060 PRJNA448333 SRR8397908 cattle_rumen
+rumen_microbiome_of_beef_cattle_202 PRJNA448333 SRR8416064 PRJNA448333 SRR8397907 cattle_rumen
+rumen_microbiome_of_beef_cattle_203 PRJNA448333 SRR8416065 PRJNA448333 SRR8397902 cattle_rumen
+rumen_microbiome_of_beef_cattle_205 PRJNA448333 SRR8416069 PRJNA448333 SRR8397901 cattle_rumen
+rumen_microbiome_of_beef_cattle_206 PRJNA448333 SRR8416070 PRJNA448333 SRR8397884 cattle_rumen
+rumen_microbiome_of_beef_cattle_207 PRJNA448333 SRR8416067 PRJNA448333 SRR8397883 cattle_rumen
+rumen_microbiome_of_beef_cattle_208 PRJNA448333 SRR8416068 PRJNA448333 SRR8397886 cattle_rumen
+rumen_microbiome_of_beef_cattle_209 PRJNA448333 SRR8416073 PRJNA448333 SRR8397885 cattle_rumen
+rumen_microbiome_of_beef_cattle_210 PRJNA448333 SRR8416074 PRJNA448333 SRR8397888 cattle_rumen
+rumen_microbiome_of_beef_cattle_301 PRJNA448333 SRR8416071 PRJNA448333 SRR8397887 cattle_rumen
+rumen_microbiome_of_beef_cattle_302 PRJNA448333 SRR8416072 PRJNA448333 SRR8397890 cattle_rumen
+rumen_microbiome_of_beef_cattle_303 PRJNA448333 SRR8416075 PRJNA448333 SRR8397889 cattle_rumen
+rumen_microbiome_of_beef_cattle_304 PRJNA448333 SRR8416076 PRJNA448333 SRR8397882 cattle_rumen
+rumen_microbiome_of_beef_cattle_305 PRJNA448333 SRR8416092 PRJNA448333 SRR8397881 cattle_rumen
+rumen_microbiome_of_beef_cattle_307 PRJNA448333 SRR8416091 PRJNA448333 SRR8397917 cattle_rumen
+rumen_microbiome_of_beef_cattle_308 PRJNA448333 SRR8416090 PRJNA448333 SRR8397918 cattle_rumen
+rumen_microbiome_of_beef_cattle_401 PRJNA448333 SRR8416089 PRJNA448333 SRR8397919 cattle_rumen
+rumen_microbiome_of_beef_cattle_402 PRJNA448333 SRR8416096 PRJNA448333 SRR8397920 cattle_rumen
+rumen_microbiome_of_beef_cattle_403 PRJNA448333 SRR8416095 PRJNA448333 SRR8397913 cattle_rumen
+rumen_microbiome_of_beef_cattle_404 PRJNA448333 SRR8416094 PRJNA448333 SRR8397914 cattle_rumen
+rumen_microbiome_of_beef_cattle_407 PRJNA448333 SRR8416093 PRJNA448333 SRR8397915 cattle_rumen
+rumen_microbiome_of_beef_cattle_501 PRJNA448333 SRR8416088 PRJNA448333 SRR8397916 cattle_rumen
+rumen_microbiome_of_beef_cattle_503 PRJNA448333 SRR8416087 PRJNA448333 SRR8397911 cattle_rumen
+rumen_microbiome_of_beef_cattle_505 PRJNA448333 SRR8416102 PRJNA448333 SRR8397912 cattle_rumen
+rumen_microbiome_of_beef_cattle_506 PRJNA448333 SRR8416083 PRJNA448333 SRR8397894 cattle_rumen
+rumen_microbiome_of_beef_cattle_601 PRJNA448333 SRR8416063 PRJNA448333 SRR8397893 cattle_rumen
+rumen_microbiome_of_beef_cattle_604 PRJNA448333 SRR8416066 PRJNA448333 SRR8397892 cattle_rumen
+rumen_microbiome_of_beef_cattle_608 PRJNA448333 SRR8416097 PRJNA448333 SRR8397891 cattle_rumen
+rumen_microbiome_of_beef_cattle_611 PRJNA448333 SRR8416098 PRJNA448333 SRR8397898 cattle_rumen
+rumen_microbiome_of_beef_cattle_612 PRJNA448333 SRR8416099 PRJNA448333 SRR8397897 cattle_rumen
+rumen_microbiome_of_beef_cattle_701 PRJNA448333 SRR8416100 PRJNA448333 SRR8397896 cattle_rumen
+rumen_microbiome_of_beef_cattle_702 PRJNA448333 SRR8416077 PRJNA448333 SRR8397895 cattle_rumen
+rumen_microbiome_of_beef_cattle_703 PRJNA448333 SRR8416078 PRJNA448333 SRR8397900 cattle_rumen
+rumen_microbiome_of_beef_cattle_704 PRJNA448333 SRR8416082 PRJNA448333 SRR8397899 cattle_rumen
+rumen_microbiome_of_beef_cattle_705 PRJNA448333 SRR8416081 PRJNA448333 SRR8397926 cattle_rumen
+rumen_microbiome_of_beef_cattle_706 PRJNA448333 SRR8416086 PRJNA448333 SRR8397927 cattle_rumen
+rumen_microbiome_of_beef_cattle_707 PRJNA448333 SRR8416084 PRJNA448333 SRR8397924 cattle_rumen
+rumen_microbiome_of_beef_cattle_708 PRJNA448333 SRR8416101 PRJNA448333 SRR8397925 cattle_rumen
+rumen_microbiome_of_beef_cattle_709 PRJNA448333 SRR8416085 PRJNA448333 SRR8397922 cattle_rumen
+rumen_microbiome_of_beef_cattle_710 PRJNA448333 SRR8416080 PRJNA448333 SRR8397923 cattle_rumen
+rumen_microbiome_of_beef_cattle_712 PRJNA448333 SRR8416079 PRJNA448333 SRR8397921 cattle_rumen
+HiTCE_2d PRJNA344005 SRR4308224 PRJNA344005 SRR4308227 groundwater
+HiTCEB12_2d PRJNA344005 SRR4308225 PRJNA344005 SRR4308226 groundwater
+CS-Sed16-2cmA PRJNA453733 SRR7083928 PRJNA453733 SRR7083931 lake
+amazon_plume_2_0um_from_station_10 PRJNA237345 SRR1193190 PRJNA237344 SRR1205250 river
+amazon_plume_2_0um_from_station_2 PRJNA237345 SRR1193177 PRJNA237344 SRR1182511 river
+amazon_plume_2_0um_from_station_23 PRJNA237345 SRR1193237 PRJNA237344 SRR1202089 river
+amazon_plume_2_0um_from_station_27 PRJNA237345 SRR1193629 PRJNA237344 SRR1183643 river
+amazon_plume_2_0um_from_station_3 PRJNA237345 SRR1193226 PRJNA237344 SRR1199272 river
+amazon_plume_0_2_2_0um_from_station_10 PRJNA237345 SRR1186930 PRJNA237344 SRR1199271 river
+amazon_plume_0_2_2_0um_from_station_2 PRJNA237345 SRR1193205 PRJNA237344 SRR1182512 river
+amazon_plume_0_2_2_0um_from_station_23 PRJNA237345 SRR1193632 PRJNA237344 SRR1186214 river
+amazon_plume_0_2_2_0um_from_station_25 PRJNA237345 SRR1204579 PRJNA237344 SRR1202090 river
+amazon_plume_0_2_2_0um_from_station_27 PRJNA237345 SRR1193627 PRJNA237344 SRR1183650 river
+amazon_plume_0_2_2_0um_from_station_3 PRJNA237345 SRR1193215 PRJNA237344 SRR1185413 river
+amazon_river_0_2_2_0um_from_belem PRJNA237345 SRR1781804 PRJNA237344 SRR1790489 river
+amazon_river_0_2_2_0um_from_macapa_north_channel PRJNA237345 SRR1785209 PRJNA237344 SRR1786279 river
+amazon_river_0_2_2_0um_from_macapa_south_channel PRJNA237345 SRR1784299 PRJNA237344 SRR1787940 river
+amazon_river_0_2_2_0um_from_obidos PRJNA237345 SRR1781945 PRJNA237344 SRR1790676 river
+amazon_river_0_2_2_0um_from_tapajos_depth PRJNA237345 SRR1779221 PRJNA237344 SRR1792674 river
+amazon_river_0_2_2_0um_from_tapajos_surface PRJNA237345 SRR1777513 PRJNA237344 SRR1796116 river
+amazon_river_2_0_297um_from_belem PRJNA237345 SRR1781811 PRJNA237344 SRR1790644 river
+amazon_river_2_0_297um_from_macapa_north_channel PRJNA237345 SRR1785350 PRJNA237344 SRR1786281 river
+amazon_river_2_0_297um_from_macapa_south_channel PRJNA237345 SRR1784304 PRJNA237344 SRR1787943 river
+amazon_river_2_0_297um_from_obidos PRJNA237345 SRR1782579 PRJNA237344 SRR1790678 river
+amazon_river_2_0_297um_from_tapajos_depth PRJNA237345 SRR1781711 PRJNA237344 SRR1792852 river
+amazon_river_2_0_297um_from_tapajos_surface PRJNA237345 SRR1778024 PRJNA237344 SRR1796118 river
+26 PRJNA502453 SRR8361532 PRJNA467728 SRR7592711 ocean
+58_r PRJNA502454 SRR8361534 PRJNA467772 SRR7609573 ocean
+125_r PRJNA468332 SRR7962480 PRJNA467773 SRR7609569 ocean
+8 PRJNA502455 SRR8297879 PRJNA467724 SRR7592287 ocean
+85 PRJNA502456 SRR8297845 PRJNA467765 SRR7609362 ocean
+76 PRJNA468305 SRR7949679 PRJNA467757 SRR7608731 ocean
+40 PRJNA467774 SRR7609608 PRJNA467736 SRR7595425 ocean
+53 PRJNA468299 SRR7609574 PRJNA502421 SRR8361352 ocean
+73 PRJNA468306 SRR7949683 PRJNA467754 SRR7608223 ocean
+23 PRJNA467775 SRR7609632 PRJNA467727 SRR7633009 ocean
+49 PRJNA467776 SRR7609653 PRJNA502442 SRR8361104 ocean
+14 PRJNA468308 SRR7949682 PRJNA502410 SRR8360542 ocean
+44 PRJNA502457 SRR8361579 PRJNA467738 SRR7600393 ocean
+29 PRJNA468309 SRR7949681 PRJNA467730 SRR7593831 ocean
+18 PRJNA467777 SRR7609655 PRJNA502413 SRR8360544 ocean
+24 PRJNA502458 SRR8361603 PRJNA502440 SRR8360549 ocean
+79 PRJNA468143 SRR7609654 PRJNA467760 SRR7609143 ocean
+22 PRJNA468300 SRR7609599 PRJNA502414 SRR8360545 ocean
+21 PRJNA468310 SRR7949678 PRJNA468208 SRR7592488 ocean
+60 PRJNA467778 SRR7609828 PRJNA502425 SRR8361517 ocean
+15 PRJNA468144 SRR7609827 PRJNA502411 SRR8360457 ocean
+75 PRJNA502459 SRR8361602 PRJNA467756 SRR7608423 ocean
+3 PRJNA468311 SRR7949692 PRJNA467722 SRR7592226 ocean
+20 PRJNA467779 SRR7609829 PRJNA467726 SRR7592489 ocean
+91 PRJNA468312 SRR7949677 PRJNA467769 SRR7609461 ocean
+56 PRJNA468152 SRR7615220 PRJNA467743 SRR7601261 ocean
+46 PRJNA502460 SRR8297843 PRJNA468213 SRR7600395 ocean
+2 PRJNA468313 SRR7609607 PRJNA467721 SRR7592225 ocean
+38 PRJNA468301 SRR7609600 PRJNA468211 SRR7594461 ocean
+45 PRJNA468314 SRR7962234 PRJNA467739 SRR7600394 ocean
+36 PRJNA502461 SRR8361605 PRJNA467733 SRR7594165 ocean
+39 PRJNA502462 SRR8297856 PRJNA467735 SRR7594967 ocean
+68 PRJNA502463 SRR8361604 PRJNA467751 SRR7608008 ocean
+30 PRJNA468315 SRR7962336 PRJNA502416 SRR8361099 ocean
+48 PRJNA502464 SRR8361607 PRJNA467741 SRR7600453 ocean
+42 PRJNA468316 SRR7962340 PRJNA502418 SRR8361100 ocean
+25 PRJNA467780 SRR7609843 PRJNA502415 SRR8361103 ocean
+78 PRJNA468317 SRR7962339 PRJNA467759 SRR7608833 ocean
+63 PRJNA468318 SRR7962244 PRJNA467746 SRR7601694 ocean
+43 PRJNA468302 SRR7815156 PRJNA468212 SRR7600353 ocean
+66 PRJNA467781 SRR7609844 PRJNA467749 SRR7607748 ocean
+54 PRJNA468319 SRR7962337 PRJNA468214 SRR7601174 ocean
+41 PRJNA467782 SRR7609846 PRJNA467737 SRR7599849 ocean
+28 PRJNA468320 SRR7962297 PRJNA467729 SRR7592826 ocean
+32 PRJNA468321 SRR7962267 PRJNA468210 SRR7594064 ocean
+81 PRJNA468145 SRR7610454 PRJNA467762 SRR7609163 ocean
+70 PRJNA468322 SRR7962239 PRJNA467752 SRR7608010 ocean
+67 PRJNA468323 SRR7962329 PRJNA467750 SRR7607757 ocean
+33 PRJNA467783 SRR7610453 PRJNA502417 SRR8361102 ocean
+27 PRJNA468331 SRR7962479 PRJNA468209 SRR7593830 ocean
+34 PRJNA502465 SRR8297868 PRJNA467732 SRR7594085 ocean
+4 PRJNA468146 SRR7610452 PRJNA467723 SRR7592236 ocean
+57 PRJNA468147 SRR7610935 PRJNA502423 SRR8361526 ocean
+1 PRJNA502466 SRR8361608 PRJNA467720 SRR7592212 ocean
+47 PRJNA468148 SRR7610853 PRJNA467740 SRR7600454 ocean
+9 PRJNA502467 SRR8297869 PRJNA502408 SRR8360547 ocean
+77 PRJNA468324 SRR7962474 PRJNA467758 SRR7608832 ocean
+55 PRJNA467784 SRR7610867 PRJNA502422 SRR8361371 ocean
+10 PRJNA468325 SRR7962473 PRJNA502409 SRR8360543 ocean
+11 PRJNA468326 SRR7962475 PRJNA467725 SRR7592321 ocean
+58 PRJNA502468 SRR8297878 PRJNA502424 SRR8361527 ocean
+52 PRJNA468327 SRR7962478 PRJNA467742 SRR7600455 ocean
+51 PRJNA468149 SRR7611057 PRJNA502420 SRR8361350 ocean
+50 PRJNA468328 SRR7962477 PRJNA502419 SRR8361349 ocean
+90 PRJNA467785 SRR7611056 PRJNA467768 SRR7609452 ocean
+80 PRJNA467786 SRR7611058 PRJNA467761 SRR7609166 ocean
+89 PRJNA468150 SRR7611500 PRJNA467766 SRR7609363 ocean
+35 PRJNA467787 SRR7612580 PRJNA502441 SRR8361101 ocean
+71 PRJNA468329 SRR7962476 PRJNA467753 SRR7608017 ocean
+31 PRJNA468151 SRR7611499 PRJNA467731 SRR7593934 ocean
+84 PRJNA502451 SRR8393198 PRJNA467764 SRR7609361 ocean
+61 PRJNA467788 SRR7615219 PRJNA502426 SRR8361516 ocean
+36_r PRJNA468330 SRR7962481 PRJNA467770 SRR7609462 ocean
+82 PRJNA502452 SRR8361531 PRJNA467763 SRR7609344 ocean
+sample_106 PRJNA603240 SRR10968230 PRJNA603240 SRR10968263 deadwood
+sample_069 PRJNA603240 SRR10968231 PRJNA603240 SRR10968264 deadwood
+sample_055 PRJNA603240 SRR10968232 PRJNA603240 SRR10968265 deadwood
+sample_049 PRJNA603240 SRR10968233 PRJNA603240 SRR10968266 deadwood
+sample_031 PRJNA603240 SRR10968234 PRJNA603240 SRR10968267 deadwood
+sample_116 PRJNA603240 SRR10968236 PRJNA603240 SRR10968225 deadwood
+sample_110 PRJNA603240 SRR10968237 PRJNA603240 SRR10968226 deadwood
+sample_044 PRJNA603240 SRR10968238 PRJNA603240 SRR10968227 deadwood
+sample_007 PRJNA603240 SRR10968239 PRJNA603240 SRR10968228 deadwood
+sample_006 PRJNA603240 SRR10968240 PRJNA603240 SRR10968229 deadwood
+Tag1363-1 PRJNA202380 SRR1138694 PRJNA202380 SRR1267595 sheep_rumen
+Tag1111-1 PRJNA202380 SRR1138697 PRJNA202380 SRR1222429 sheep_rumen
+Tag1111-2 PRJNA202380 SRR1138702 PRJNA202380 SRR1222431 sheep_rumen
+Tag1234-1 PRJNA202380 SRR1206249 PRJNA202380 SRR1206671 sheep_rumen
+Tag1494-1 PRJNA202380 SRR873450 PRJNA202380 SRR873595 sheep_rumen
+Tag1283-1 PRJNA202380 SRR873451 PRJNA202380 SRR873596 sheep_rumen
+Tag1435-1 PRJNA202380 SRR873452 PRJNA202380 SRR873597 sheep_rumen
+Tag1494-2 PRJNA202380 SRR873453 PRJNA202380 SRR873598 sheep_rumen
+Tag1265-1 PRJNA202380 SRR873454 PRJNA202380 SRR873599 sheep_rumen
+Tag1435-2 PRJNA202380 SRR873455 PRJNA202380 SRR873600 sheep_rumen
+Tag1283-2 PRJNA202380 SRR873456 PRJNA202380 SRR873601 sheep_rumen
+Tag1265-2 PRJNA202380 SRR873457 PRJNA202380 SRR873602 sheep_rumen
+Tag1363-2 PRJNA202380 SRR873458 PRJNA202380 SRR873603 sheep_rumen
+Tag1174-1 PRJNA202380 SRR873459 PRJNA202380 SRR873604 sheep_rumen
+Tag1234-2 PRJNA202380 SRR873460 PRJNA202380 SRR873605 sheep_rumen
+Tag1586-1 PRJNA202380 SRR873461 PRJNA202380 SRR873606 sheep_rumen
+Tag1586-2 PRJNA202380 SRR873462 PRJNA202380 SRR873607 sheep_rumen
+Tag1333-1 PRJNA202380 SRR873463 PRJNA202380 SRR873608 sheep_rumen
+Tag1174-2 PRJNA202380 SRR873464 PRJNA202380 SRR873609 sheep_rumen
+Tag1333-2 PRJNA202380 SRR873465 PRJNA202380 SRR873610 sheep_rumen
+Ahn.Mel47 PRJNA541981 SRR9118109 PRJNA541981 SRR9033724 human_skin
+Ahn.Mel44 PRJNA541981 SRR9118110 PRJNA541981 SRR9033723 human_skin
+Ahn.Mel46 PRJNA541981 SRR9118111 PRJNA541981 SRR9033725 human_skin
+Ahn.Mel42 PRJNA541981 SRR9118112 PRJNA541981 SRR9033716 human_skin
+Ahn.Mel43 PRJNA541981 SRR9118113 PRJNA541981 SRR9033715 human_skin
+Ahn.Mel37 PRJNA541981 SRR9118114 PRJNA541981 SRR9033753 human_skin
+Ahn.Mel38 PRJNA541981 SRR9118115 PRJNA541981 SRR9033754 human_skin
+Ahn.Mel34 PRJNA541981 SRR9118116 PRJNA541981 SRR9033749 human_skin
+Ahn.Mel35 PRJNA541981 SRR9118117 PRJNA541981 SRR9033750 human_skin
+Ahn.Mel28 PRJNA541981 SRR9118118 PRJNA541981 SRR9033748 human_skin
+Ahn.Mel32 PRJNA541981 SRR9118119 PRJNA541981 SRR9033752 human_skin
+Ahn.Mel19 PRJNA541981 SRR9118120 PRJNA541981 SRR9033727 human_skin
+Ahn.Mel26 PRJNA541981 SRR9118121 PRJNA541981 SRR9033720 human_skin
+Ahn.Mel10 PRJNA541981 SRR9118122 PRJNA541981 SRR9033737 human_skin
+Ahn.Mel13 PRJNA541981 SRR9118123 PRJNA541981 SRR9033742 human_skin
+Ahn.Mel01 PRJNA541981 SRR9118124 PRJNA541981 SRR9033738 human_skin
+Ahn.Mel04 PRJNA541981 SRR9118125 PRJNA541981 SRR9033719 human_skin
+UAB088_W2D1 PRJNA797778 SRR17858151 PRJNA797778 SRR17635672 human_vagina
+UAB082_W10D2 PRJNA797778 SRR17858152 PRJNA797778 SRR17635674 human_vagina
+UAB082_W5D7 PRJNA797778 SRR17858153 PRJNA797778 SRR17635676 human_vagina
+UAB082_W3D7 PRJNA797778 SRR17858154 PRJNA797778 SRR17635677 human_vagina
+UAB082_W2D5 PRJNA797778 SRR17858155 PRJNA797778 SRR17635678 human_vagina
+UAB079_W10D1 PRJNA797778 SRR17858156 PRJNA797778 SRR17635679 human_vagina
+UAB079_W7D6 PRJNA797778 SRR17858157 PRJNA797778 SRR17635680 human_vagina
+UAB079_W6D7 PRJNA797778 SRR17858158 PRJNA797778 SRR17635681 human_vagina
+UAB079_W3D7 PRJNA797778 SRR17858159 PRJNA797778 SRR17635682 human_vagina
+UAB079_W2D2 PRJNA797778 SRR17858160 PRJNA797778 SRR17635683 human_vagina
+UAB006_W2D2 PRJNA797778 SRR17858161 PRJNA797778 SRR17635801 human_vagina
+UAB077_W9D7 PRJNA797778 SRR17858162 PRJNA797778 SRR17635685 human_vagina
+UAB077_W8D2 PRJNA797778 SRR17858163 PRJNA797778 SRR17635686 human_vagina
+UAB077_W5D7 PRJNA797778 SRR17858164 PRJNA797778 SRR17635687 human_vagina
+UAB077_W3D1 PRJNA797778 SRR17858165 PRJNA797778 SRR17635688 human_vagina
+UAB077_W1D7 PRJNA797778 SRR17858166 PRJNA797778 SRR17635689 human_vagina
+UAB071_W10D2 PRJNA797778 SRR17858167 PRJNA797778 SRR17635690 human_vagina
+UAB071_W7D7 PRJNA797778 SRR17858168 PRJNA797778 SRR17635691 human_vagina
+UAB071_W5D7 PRJNA797778 SRR17858169 PRJNA797778 SRR17635692 human_vagina
+UAB071_W3D7 PRJNA797778 SRR17858170 PRJNA797778 SRR17635693 human_vagina
+UAB071_W1D7 PRJNA797778 SRR17858171 PRJNA797778 SRR17635694 human_vagina
+UAB002_W5D7 PRJNA797778 SRR17858172 PRJNA797778 SRR17635804 human_vagina
+AYAC02_W9D3 PRJNA797778 SRR17858173 PRJNA797778 SRR17635818 human_vagina
+UAB060_W10D2 PRJNA797778 SRR17858174 PRJNA797778 SRR17635696 human_vagina
+UAB060_W7D7 PRJNA797778 SRR17858175 PRJNA797778 SRR17635697 human_vagina
+UAB060_W5D7 PRJNA797778 SRR17858176 PRJNA797778 SRR17635698 human_vagina
+UAB060_W3D7 PRJNA797778 SRR17858177 PRJNA797778 SRR17635763 human_vagina
+UAB060_W1D6 PRJNA797778 SRR17858178 PRJNA797778 SRR17635764 human_vagina
+UAB052_W10D2 PRJNA797778 SRR17858179 PRJNA797778 SRR17635765 human_vagina
+UAB052_W8D3 PRJNA797778 SRR17858180 PRJNA797778 SRR17635766 human_vagina
+UAB052_W5D7 PRJNA797778 SRR17858181 PRJNA797778 SRR17635767 human_vagina
+UAB052_W4D2 PRJNA797778 SRR17858182 PRJNA797778 SRR17635768 human_vagina
+UAB002_W1D4 PRJNA797778 SRR17858183 PRJNA797778 SRR17635806 human_vagina
+UAB052_W1D5 PRJNA797778 SRR17858184 PRJNA797778 SRR17635769 human_vagina
+UAB048_W9D7 PRJNA797778 SRR17858185 PRJNA797778 SRR17635771 human_vagina
+UAB048_W7D7 PRJNA797778 SRR17858186 PRJNA797778 SRR17635772 human_vagina
+UAB048_W5D2 PRJNA797778 SRR17858187 PRJNA797778 SRR17635773 human_vagina
+UAB048_W3D7 PRJNA797778 SRR17858188 PRJNA797778 SRR17635774 human_vagina
+UAB048_W1D7 PRJNA797778 SRR17858189 PRJNA797778 SRR17635775 human_vagina
+UAB044_W9D7 PRJNA797778 SRR17858190 PRJNA797778 SRR17635776 human_vagina
+UAB044_W7D7 PRJNA797778 SRR17858191 PRJNA797778 SRR17635777 human_vagina
+UAB044_W5D7 PRJNA797778 SRR17858192 PRJNA797778 SRR17635778 human_vagina
+UAB044_W3D7 PRJNA797778 SRR17858193 PRJNA797778 SRR17635779 human_vagina
+AYAC09_W5D7 PRJNA797778 SRR17858194 PRJNA797778 SRR17635747 human_vagina
+UAB044_W1D7 PRJNA797778 SRR17858195 PRJNA797778 SRR17635780 human_vagina
+UAB028_W9D7 PRJNA797778 SRR17858196 PRJNA797778 SRR17635598 human_vagina
+UAB028_W7D7 PRJNA797778 SRR17858197 PRJNA797778 SRR17635599 human_vagina
+UAB022_W10D1 PRJNA797778 SRR17858198 PRJNA797778 SRR17635466 human_vagina
+UAB022_W6D1 PRJNA797778 SRR17858199 PRJNA797778 SRR17635468 human_vagina
+UAB021_W8D3 PRJNA797778 SRR17858200 PRJNA797778 SRR17635471 human_vagina
+UAB021_W5D7 PRJNA797778 SRR17858201 PRJNA797778 SRR17635472 human_vagina
+UAB015_W10D7 PRJNA797778 SRR17858202 PRJNA797778 SRR17635475 human_vagina
+UAB015_W9D4 PRJNA797778 SRR17858203 PRJNA797778 SRR17635477 human_vagina
+UAB011_W9D7 PRJNA797778 SRR17858204 PRJNA797778 SRR17635481 human_vagina
+AYAC03_W4D7 PRJNA797778 SRR17858205 PRJNA797778 SRR17635796 human_vagina
+UAB011_W7D7 PRJNA797778 SRR17858206 PRJNA797778 SRR17635482 human_vagina
+UAB011_W7D1 PRJNA797778 SRR17858207 PRJNA797778 SRR17635483 human_vagina
+UAB039_W9D7 PRJNA797778 SRR17858208 PRJNA797778 SRR17635782 human_vagina
+UAB039_W7D7 PRJNA797778 SRR17858209 PRJNA797778 SRR17635783 human_vagina
+UAB039_W5D7 PRJNA797778 SRR17858210 PRJNA797778 SRR17635784 human_vagina
+UAB039_W3D6 PRJNA797778 SRR17858211 PRJNA797778 SRR17635785 human_vagina
+UAB039_W1D7 PRJNA797778 SRR17858212 PRJNA797778 SRR17635786 human_vagina
+UAB038_W9D7 PRJNA797778 SRR17858213 PRJNA797778 SRR17635787 human_vagina
+UAB038_W7D4 PRJNA797778 SRR17858214 PRJNA797778 SRR17635788 human_vagina
+UAB038_W5D7 PRJNA797778 SRR17858215 PRJNA797778 SRR17635789 human_vagina
+AYAC03_W1D7 PRJNA797778 SRR17858216 PRJNA797778 SRR17635807 human_vagina
+UAB038_W3D7 PRJNA797778 SRR17858217 PRJNA797778 SRR17635790 human_vagina
+UAB038_W2D2 PRJNA797778 SRR17858218 PRJNA797778 SRR17635791 human_vagina
+UAB035_W9D7 PRJNA797778 SRR17858219 PRJNA797778 SRR17635593 human_vagina
+UAB035_W7D4 PRJNA797778 SRR17858220 PRJNA797778 SRR17635594 human_vagina
+UAB035_W5D7 PRJNA797778 SRR17858221 PRJNA797778 SRR17635595 human_vagina
+UAB035_W3D6 PRJNA797778 SRR17858222 PRJNA797778 SRR17635596 human_vagina
+UAB035_W2D7 PRJNA797778 SRR17858223 PRJNA797778 SRR17635597 human_vagina
+AYAC03_W7D5 PRJNA797778 SRR17858224 PRJNA797778 SRR17635487 human_vagina
+AYAC02_W7D5 PRJNA797778 SRR17858225 PRJNA797778 SRR17635635 human_vagina
+UAB010_W7D5 PRJNA797778 SRR17858226 PRJNA797778 SRR17635489 human_vagina
+AYAC02_W5D6 PRJNA797778 SRR17858227 PRJNA797778 SRR17635576 human_vagina
+UAB007_W9D7 PRJNA797778 SRR17858228 PRJNA797778 SRR17635493 human_vagina
+UAB007_W7D7 PRJNA797778 SRR17858229 PRJNA797778 SRR17635494 human_vagina
+UAB007_W6D3 PRJNA797778 SRR17858230 PRJNA797778 SRR17635495 human_vagina
+UAB006_W9D7 PRJNA797778 SRR17858231 PRJNA797778 SRR17635797 human_vagina
+UAB006_W7D7 PRJNA797778 SRR17858232 PRJNA797778 SRR17635798 human_vagina
+UAB002_W9D7 PRJNA797778 SRR17858233 PRJNA797778 SRR17635802 human_vagina
+UAB002_W7D7 PRJNA797778 SRR17858234 PRJNA797778 SRR17635803 human_vagina
+UAB028_W5D5 PRJNA797778 SRR17858235 PRJNA797778 SRR17635600 human_vagina
+UAB028_W3D7 PRJNA797778 SRR17858236 PRJNA797778 SRR17635601 human_vagina
+UAB028_W1D7 PRJNA797778 SRR17858237 PRJNA797778 SRR17635602 human_vagina
+AYAC02_W3D1 PRJNA797778 SRR17858238 PRJNA797778 SRR17635793 human_vagina
+UAB022_W4D1 PRJNA797778 SRR17858239 PRJNA797778 SRR17635469 human_vagina
+UAB022_W2D1 PRJNA797778 SRR17858240 PRJNA797778 SRR17635470 human_vagina
+AYAC10_W9D5 PRJNA797778 SRR17858241 PRJNA797778 SRR17635531 human_vagina
+AYAC10_W8D1 PRJNA797778 SRR17858242 PRJNA797778 SRR17635542 human_vagina
+AYAC10_W5D4 PRJNA797778 SRR17858243 PRJNA797778 SRR17635553 human_vagina
+AYAC10_W2D7 PRJNA797778 SRR17858244 PRJNA797778 SRR17635564 human_vagina
+AYAC10_W1D7 PRJNA797778 SRR17858245 PRJNA797778 SRR17635575 human_vagina
+AYAC09_W9D7 PRJNA797778 SRR17858246 PRJNA797778 SRR17635587 human_vagina
+AYAC09_W7D4 PRJNA797778 SRR17858247 PRJNA797778 SRR17635736 human_vagina
+EM11_W9D7 PRJNA797778 SRR17858248 PRJNA797778 SRR17635808 human_vagina
+EM11_W7D7 PRJNA797778 SRR17858249 PRJNA797778 SRR17635809 human_vagina
+EM11_W5D7 PRJNA797778 SRR17858250 PRJNA797778 SRR17635810 human_vagina
+UAB021_W3D7 PRJNA797778 SRR17858251 PRJNA797778 SRR17635473 human_vagina
+EM11_W3D6 PRJNA797778 SRR17858252 PRJNA797778 SRR17635811 human_vagina
+EM11_W1D7 PRJNA797778 SRR17858253 PRJNA797778 SRR17635812 human_vagina
+EM10_W8D7 PRJNA797778 SRR17858254 PRJNA797778 SRR17635813 human_vagina
+EM10_W6D5 PRJNA797778 SRR17858255 PRJNA797778 SRR17635814 human_vagina
+EM10_W4D7 PRJNA797778 SRR17858256 PRJNA797778 SRR17635815 human_vagina
+EM10_W3D2 PRJNA797778 SRR17858257 PRJNA797778 SRR17635816 human_vagina
+EM10_W1D7 PRJNA797778 SRR17858258 PRJNA797778 SRR17635817 human_vagina
+EM09_W9D6 PRJNA797778 SRR17858259 PRJNA797778 SRR17635819 human_vagina
+EM09_W8D5 PRJNA797778 SRR17858260 PRJNA797778 SRR17635820 human_vagina
+EM09_W6D7 PRJNA797778 SRR17858261 PRJNA797778 SRR17635459 human_vagina
+UAB015_W3D1 PRJNA797778 SRR17858262 PRJNA797778 SRR17635479 human_vagina
+AYAC05_W9D7 PRJNA797778 SRR17858263 PRJNA797778 SRR17635684 human_vagina
+AYAC05_W7D7 PRJNA797778 SRR17858264 PRJNA797778 SRR17635695 human_vagina
+AYAC03_W9D7 PRJNA797778 SRR17858265 PRJNA797778 SRR17635465 human_vagina
+AYAC03_W8D6 PRJNA797778 SRR17858266 PRJNA797778 SRR17635476 human_vagina
+AYAC09_W3D4 PRJNA797778 SRR17858267 PRJNA797778 SRR17635758 human_vagina
+AYAC09_W1D7 PRJNA797778 SRR17858268 PRJNA797778 SRR17635673 human_vagina
+AYAC05_W5D5 PRJNA797778 SRR17858269 PRJNA797778 SRR17635770 human_vagina
+AYAC05_W3D7 PRJNA797778 SRR17858270 PRJNA797778 SRR17635781 human_vagina
+AYAC05_W1D6 PRJNA797778 SRR17858271 PRJNA797778 SRR17635792 human_vagina
+EM09_W4D4 PRJNA797778 SRR17858272 PRJNA797778 SRR17635462 human_vagina
+UAB015_W1D5 PRJNA797778 SRR17858273 PRJNA797778 SRR17635480 human_vagina
+EM09_W2D3 PRJNA797778 SRR17858274 PRJNA797778 SRR17635611 human_vagina
+EM04_W1D7 PRJNA797778 SRR17858275 PRJNA797778 SRR17635722 human_vagina
+UAB129_W10D2 PRJNA797778 SRR17858276 PRJNA797778 SRR17635577 human_vagina
+UAB129_W7D7 PRJNA797778 SRR17858277 PRJNA797778 SRR17635578 human_vagina
+UAB129_W5D7 PRJNA797778 SRR17858278 PRJNA797778 SRR17635579 human_vagina
+UAB129_W3D3 PRJNA797778 SRR17858279 PRJNA797778 SRR17635580 human_vagina
+UAB129_W1D7 PRJNA797778 SRR17858280 PRJNA797778 SRR17635581 human_vagina
+UAB122_W9D6 PRJNA797778 SRR17858281 PRJNA797778 SRR17635582 human_vagina
+UAB122_W6D5 PRJNA797778 SRR17858282 PRJNA797778 SRR17635583 human_vagina
+UAB122_W4D7 PRJNA797778 SRR17858283 PRJNA797778 SRR17635584 human_vagina
+UAB011_W3D6 PRJNA797778 SRR17858284 PRJNA797778 SRR17635484 human_vagina
+UAB122_W3D4 PRJNA797778 SRR17858285 PRJNA797778 SRR17635585 human_vagina
+UAB122_W1D7 PRJNA797778 SRR17858286 PRJNA797778 SRR17635586 human_vagina
+UAB119_W9D7 PRJNA797778 SRR17858287 PRJNA797778 SRR17635588 human_vagina
+UAB119_W7D7 PRJNA797778 SRR17858288 PRJNA797778 SRR17635589 human_vagina
+UAB119_W5D7 PRJNA797778 SRR17858289 PRJNA797778 SRR17635590 human_vagina
+UAB119_W3D7 PRJNA797778 SRR17858290 PRJNA797778 SRR17635591 human_vagina
+UAB119_W1D7 PRJNA797778 SRR17858291 PRJNA797778 SRR17635592 human_vagina
+UAB117_W9D6 PRJNA797778 SRR17858292 PRJNA797778 SRR17635731 human_vagina
+UAB117_W8D1 PRJNA797778 SRR17858293 PRJNA797778 SRR17635732 human_vagina
+UAB117_W5D6 PRJNA797778 SRR17858294 PRJNA797778 SRR17635733 human_vagina
+UAB011_W1D7 PRJNA797778 SRR17858295 PRJNA797778 SRR17635485 human_vagina
+UAB117_W3D6 PRJNA797778 SRR17858296 PRJNA797778 SRR17635734 human_vagina
+UAB117_W1D6 PRJNA797778 SRR17858297 PRJNA797778 SRR17635735 human_vagina
+UAB116_W9D6 PRJNA797778 SRR17858298 PRJNA797778 SRR17635737 human_vagina
+UAB116_W6D7 PRJNA797778 SRR17858299 PRJNA797778 SRR17635738 human_vagina
+UAB116_W4D6 PRJNA797778 SRR17858300 PRJNA797778 SRR17635739 human_vagina
+UAB116_W1D7 PRJNA797778 SRR17858301 PRJNA797778 SRR17635741 human_vagina
+UAB110_W10D4 PRJNA797778 SRR17858302 PRJNA797778 SRR17635742 human_vagina
+UAB110_W7D7 PRJNA797778 SRR17858303 PRJNA797778 SRR17635743 human_vagina
+UAB110_W6D2 PRJNA797778 SRR17858304 PRJNA797778 SRR17635744 human_vagina
+UAB110_W3D7 PRJNA797778 SRR17858305 PRJNA797778 SRR17635745 human_vagina
+UAB010_W5D6 PRJNA797778 SRR17858306 PRJNA797778 SRR17635490 human_vagina
+UAB110_W1D7 PRJNA797778 SRR17858307 PRJNA797778 SRR17635746 human_vagina
+UAB106_W9D7 PRJNA797778 SRR17858308 PRJNA797778 SRR17635748 human_vagina
+UAB106_W7D7 PRJNA797778 SRR17858309 PRJNA797778 SRR17635749 human_vagina
+UAB106_W5D4 PRJNA797778 SRR17858310 PRJNA797778 SRR17635750 human_vagina
+UAB106_W3D7 PRJNA797778 SRR17858311 PRJNA797778 SRR17635751 human_vagina
+UAB106_W1D7 PRJNA797778 SRR17858312 PRJNA797778 SRR17635752 human_vagina
+UAB096_W9D7 PRJNA797778 SRR17858313 PRJNA797778 SRR17635753 human_vagina
+UAB096_W7D3 PRJNA797778 SRR17858314 PRJNA797778 SRR17635754 human_vagina
+UAB096_W5D3 PRJNA797778 SRR17858315 PRJNA797778 SRR17635755 human_vagina
+UAB096_W3D3 PRJNA797778 SRR17858316 PRJNA797778 SRR17635756 human_vagina
+UAB010_W3D1 PRJNA797778 SRR17858317 PRJNA797778 SRR17635491 human_vagina
+UAB096_W1D5 PRJNA797778 SRR17858318 PRJNA797778 SRR17635757 human_vagina
+UAB093_W9D6 PRJNA797778 SRR17858319 PRJNA797778 SRR17635759 human_vagina
+UAB093_W7D7 PRJNA797778 SRR17858320 PRJNA797778 SRR17635760 human_vagina
+UAB093_W5D7 PRJNA797778 SRR17858321 PRJNA797778 SRR17635761 human_vagina
+UAB093_W4D2 PRJNA797778 SRR17858322 PRJNA797778 SRR17635762 human_vagina
+UAB093_W1D7 PRJNA797778 SRR17858323 PRJNA797778 SRR17635667 human_vagina
+UAB088_W10D1 PRJNA797778 SRR17858324 PRJNA797778 SRR17635668 human_vagina
+UAB088_W7D6 PRJNA797778 SRR17858325 PRJNA797778 SRR17635669 human_vagina
+UAB088_W6D1 PRJNA797778 SRR17858326 PRJNA797778 SRR17635670 human_vagina
+UAB088_W4D1 PRJNA797778 SRR17858327 PRJNA797778 SRR17635671 human_vagina
+UAB007_W1D7 PRJNA797778 SRR17858328 PRJNA797778 SRR17635795 human_vagina
+AYAC02_W1D7 PRJNA797778 SRR17858329 PRJNA797778 SRR17635794 human_vagina
+EM04_W3D7 PRJNA797778 SRR17858330 PRJNA797778 SRR17635711 human_vagina
+MV_FEI4_t2Q15 PRJNA339914 SRR4100706 PRJNA339914 SRR4052039 human_gut
+MV_FEI5_t3Q15 PRJNA339914 SRR4100707 PRJNA339914 SRR4052042 human_gut
+MV_FEM4_t2Q15 PRJNA339914 SRR4100708 PRJNA339914 SRR4052025 human_gut
+MV_FEM5_t3Q15 PRJNA339914 SRR4100709 PRJNA339914 SRR4052028 human_gut
+M1 PRJEB33889 ERR3473664 PRJEB33889 ERR3473656 mouse_cecum
+M2 PRJEB33889 ERR3473665 PRJEB33889 ERR3473657 mouse_cecum
+M3 PRJEB33889 ERR3473666 PRJEB33889 ERR3473658 mouse_cecum
+M4 PRJEB33889 ERR3473667 PRJEB33889 ERR3473659 mouse_cecum
+M5 PRJEB33889 ERR3473668 PRJEB33889 ERR3473660 mouse_cecum
+M6 PRJEB33889 ERR3473669 PRJEB33889 ERR3473661 mouse_cecum
+M7 PRJEB33889 ERR3473670 PRJEB33889 ERR3473662 mouse_cecum
+M8 PRJEB33889 ERR3473671 PRJEB33889 ERR3473663 mouse_cecum
+ad_supplemented_with_nitrogen_data_2 PRJNA698464 SRR13618127 PRJNA698464 SRR13618123 bioreactor
+ad_supplemented_with_nitrogen_data_1 PRJNA698464 SRR13618128 PRJNA698464 SRR13618124 bioreactor
+ad_supplemented_with_hydrogen_data_2 PRJNA698464 SRR13618129 PRJNA698464 SRR13618125 bioreactor
+ad_supplemented_with_hydrogen_data_1 PRJNA698464 SRR13618130 PRJNA698464 SRR13618126 bioreactor
+X4074 PRJNA396840 SRR5892181 PRJNA396840 SRR5892217 human_oral
+X4080 PRJNA396840 SRR5892182 PRJNA396840 SRR5892216 human_oral
+X4068 PRJNA396840 SRR5892183 PRJNA396840 SRR5892215 human_oral
+X4072 PRJNA396840 SRR5892184 PRJNA396840 SRR5892214 human_oral
+X4060 PRJNA396840 SRR5892185 PRJNA396840 SRR5892213 human_oral
+X4064 PRJNA396840 SRR5892186 PRJNA396840 SRR5892212 human_oral
+X4050 PRJNA396840 SRR5892187 PRJNA396840 SRR5892211 human_oral
+X4056 PRJNA396840 SRR5892188 PRJNA396840 SRR5892210 human_oral
+X4108 PRJNA396840 SRR5892189 PRJNA396840 SRR5892233 human_oral
+X4082 PRJNA396840 SRR5892190 PRJNA396840 SRR5892209 human_oral
+X4088 PRJNA396840 SRR5892191 PRJNA396840 SRR5892208 human_oral
+X4124 PRJNA396840 SRR5892192 PRJNA396840 SRR5892236 human_oral
+X4120 PRJNA396840 SRR5892195 PRJNA396840 SRR5892235 human_oral
+X4096 PRJNA396840 SRR5892200 PRJNA396840 SRR5892238 human_oral
+X4092 PRJNA396840 SRR5892204 PRJNA396840 SRR5892237 human_oral
+X4104 PRJNA396840 SRR5892205 PRJNA396840 SRR5892240 human_oral
+X4100 PRJNA396840 SRR5892207 PRJNA396840 SRR5892239 human_oral
+X4024 PRJNA396840 SRR5892218 PRJNA396840 SRR5892196 human_oral
+X4020 PRJNA396840 SRR5892219 PRJNA396840 SRR5892197 human_oral
+X4014 PRJNA396840 SRR5892220 PRJNA396840 SRR5892198 human_oral
+X4012 PRJNA396840 SRR5892221 PRJNA396840 SRR5892199 human_oral
+X4040 PRJNA396840 SRR5892222 PRJNA396840 SRR5892206 human_oral
+X4036 PRJNA396840 SRR5892223 PRJNA396840 SRR5892201 human_oral
+X4032 PRJNA396840 SRR5892224 PRJNA396840 SRR5892202 human_oral
+X4028 PRJNA396840 SRR5892225 PRJNA396840 SRR5892203 human_oral
+X4048 PRJNA396840 SRR5892226 PRJNA396840 SRR5892193 human_oral
+X4044 PRJNA396840 SRR5892227 PRJNA396840 SRR5892194 human_oral
+X4132 PRJNA396840 SRR5892228 PRJNA396840 SRR5892232 human_oral
+X4128 PRJNA396840 SRR5892229 PRJNA396840 SRR5892231 human_oral
+X4116 PRJNA396840 SRR5892230 PRJNA396840 SRR5892234 human_oral
+L2 PRJNA492158 SRR7880350 PRJNA492158 SRR7880198 human_gut
+L3 PRJNA492158 SRR7880351 PRJNA492158 SRR7880201 human_gut
+L5 PRJNA492158 SRR7880352 PRJNA492158 SRR7880203 human_gut
+L6 PRJNA492158 SRR7880353 PRJNA492158 SRR7880202 human_gut
+L7 PRJNA492158 SRR7880354 PRJNA492158 SRR7880205 human_gut
+L8 PRJNA492158 SRR7880355 PRJNA492158 SRR7880204 human_gut
+L9 PRJNA492158 SRR7880356 PRJNA492158 SRR7880207 human_gut
+L11 PRJNA492158 SRR7880357 PRJNA492158 SRR7880227 human_gut
+L12 PRJNA492158 SRR7880358 PRJNA492158 SRR7880226 human_gut
+O1 PRJNA492158 SRR7880359 PRJNA492158 SRR7880225 human_gut
+V6 PRJNA492158 SRR7880360 PRJNA492158 SRR7880217 human_gut
+V5 PRJNA492158 SRR7880361 PRJNA492158 SRR7880216 human_gut
+V8 PRJNA492158 SRR7880362 PRJNA492158 SRR7880218 human_gut
+V7 PRJNA492158 SRR7880363 PRJNA492158 SRR7880219 human_gut
+V11 PRJNA492158 SRR7880364 PRJNA492158 SRR7880223 human_gut
+V9 PRJNA492158 SRR7880365 PRJNA492158 SRR7880197 human_gut
+O6 PRJNA492158 SRR7880366 PRJNA492158 SRR7880228 human_gut
+O8 PRJNA492158 SRR7880367 PRJNA492158 SRR7880220 human_gut
+O9 PRJNA492158 SRR7880368 PRJNA492158 SRR7880210 human_gut
+O10 PRJNA492158 SRR7880369 PRJNA492158 SRR7880211 human_gut
+O2 PRJNA492158 SRR7880370 PRJNA492158 SRR7880224 human_gut
+O3 PRJNA492158 SRR7880371 PRJNA492158 SRR7880231 human_gut
+O4 PRJNA492158 SRR7880372 PRJNA492158 SRR7880230 human_gut
+O5 PRJNA492158 SRR7880373 PRJNA492158 SRR7880229 human_gut
+O11 PRJNA492158 SRR7880374 PRJNA492158 SRR7880208 human_gut
+O12 PRJNA492158 SRR7880375 PRJNA492158 SRR7880209 human_gut
+3m_Station6_GOM PRJNA278075 SRR1918203 PRJNA278075 SRR2001210 ocean
+CRF2_7 PRJEB38017 ERR4077213 PRJEB38017 ERR4073751 cocoa_box_fermentation
+CRF2_20 PRJEB38017 ERR4077214 PRJEB38017 ERR4073752 cocoa_box_fermentation
+CRF2_68 PRJEB38017 ERR4077215 PRJEB38017 ERR4073754 cocoa_box_fermentation
+HP_AsLow PRJNA616041 SRR11450577 PRJNA616041 SRR11450583 paddy_soil
+SKS_AsHig PRJNA616041 SRR11450578 PRJNA616041 SRR11450584 paddy_soil
+CZ_AsHig PRJNA616041 SRR11450579 PRJNA616041 SRR11450587 paddy_soil
+CL_AsHig PRJNA616041 SRR11450580 PRJNA616041 SRR11450588 paddy_soil
+YCP_AsLow PRJNA616041 SRR11450585 PRJNA616041 SRR11450581 paddy_soil
+LH_AsLow PRJNA616041 SRR11450586 PRJNA616041 SRR11450582 paddy_soil
+GZ-C-35-D5 PRJNA395125 SRR5868086 PRJNA393770 SRR5892338 bioreactor
+GZ-C-35-Mid PRJNA395125 SRR5868087 PRJNA393770 SRR5892352 bioreactor
+SWH-C-35-D5 PRJNA395125 SRR5868115 PRJNA393770 SRR5894891 bioreactor
+SWH-C-35-Mid PRJNA395125 SRR5868116 PRJNA393770 SRR5894892 bioreactor
+GZ-X-35-Mid PRJNA395125 SRR5868138 PRJNA393770 SRR5894899 bioreactor
+SWH-X-35-Mid PRJNA395125 SRR5868139 PRJNA393770 SRR5894900 bioreactor
+SWH-C-55-Mid PRJNA395125 SRR5868141 PRJNA393770 SRR5894893 bioreactor
+AH1 PRJEB12284 ERR1198915 PRJEB12083 ERR1191817 wastewater
+AH4 PRJEB12284 ERR1198916 PRJEB12083 ERR1191820 wastewater
+AH5 PRJEB12284 ERR1198917 PRJEB12083 ERR1191821 wastewater
+AH6 PRJEB12284 ERR1198918 PRJEB12083 ERR1191822 wastewater
+DF1 PRJEB12284 ERR1198919 PRJEB12083 ERR1193331 wastewater
+DF4 PRJEB12284 ERR1198920 PRJEB12083 ERR1193299 wastewater
+DF5 PRJEB12284 ERR1198921 PRJEB12083 ERR1193300 wastewater
+DF6 PRJEB12284 ERR1198922 PRJEB12083 ERR1193301 wastewater
+BazoSym_LS-ET_5 PRJEB32788 ERR3342479 PRJEB32787 ERR3342499 mussel_gill
+BazoSym_LS-ET_4 PRJEB32788 ERR3342480 PRJEB32787 ERR3342500 mussel_gill
+BazoSym_LS-MS_3 PRJEB32788 ERR3342481 PRJEB32787 ERR3342501 mussel_gill
+BazoSym_LS-MS_2 PRJEB32788 ERR3342482 PRJEB32787 ERR3342502 mussel_gill
+BazoSym_LS-MS_1 PRJEB32788 ERR3342483 PRJEB32787 ERR3342503 mussel_gill
+BspSym_Li_5 PRJEB32788 ERR3342467 PRJEB32787 ERR3342486 mussel_gill
+BspSym_Li_4 PRJEB32788 ERR3342468 PRJEB32787 ERR3342487 mussel_gill
+BspSym_Li_3 PRJEB32788 ERR3342469 PRJEB32787 ERR3342488 mussel_gill
+BspSym_Li_2 PRJEB32788 ERR3342470 PRJEB32787 ERR3342489 mussel_gill
+BspSym_Li_1 PRJEB32788 ERR3342471 PRJEB32787 ERR3342490 mussel_gill
+BspSym_Cl_4 PRJEB32788 ERR3342472 PRJEB32787 ERR3342492 mussel_gill
+BspSym_Cl_3 PRJEB32788 ERR3342473 PRJEB32787 ERR3342493 mussel_gill
+BspSym_Cl_2 PRJEB32788 ERR3342474 PRJEB32787 ERR3342494 mussel_gill
+BspSym_Cl_1 PRJEB32788 ERR3342475 PRJEB32787 ERR3342495 mussel_gill
+BputSym_Se-AL_3 PRJEB32788 ERR3342476 PRJEB32787 ERR3342496 mussel_gill
+BputSym_Se-AL_2 PRJEB32788 ERR3342477 PRJEB32787 ERR3342497 mussel_gill
+BputSym_Se-AL_1 PRJEB32788 ERR3342478 PRJEB32787 ERR3342498 mussel_gill
+CSM67UC6 PRJNA398089 SRR5949109 PRJNA398089 SRR5936217 human_gut
+CSM67UDY PRJNA398089 SRR5949110 PRJNA398089 SRR5936216 human_gut
+HSM67VI9 PRJNA398089 SRR5949111 PRJNA398089 SRR5936212 human_gut
+HSM6XRTQ PRJNA398089 SRR5949112 PRJNA398089 SRR5936211 human_gut
+CSM79HHM PRJNA398089 SRR5949113 PRJNA398089 SRR5946811 human_gut
+CSM79HIR PRJNA398089 SRR5949114 PRJNA398089 SRR5936215 human_gut
+CSM79HJO PRJNA398089 SRR5949115 PRJNA398089 SRR5947089 human_gut
+HSM67VEI PRJNA398089 SRR5949116 PRJNA398089 SRR5936210 human_gut
+MSM79H9K PRJNA398089 SRR5949117 PRJNA398089 SRR5935975 human_gut
+MSM79HAH PRJNA398089 SRR5949118 PRJNA398089 SRR5935976 human_gut
+HSM7J4L5 PRJNA398089 SRR5949119 PRJNA398089 SRR5935953 human_gut
+MSM79H7E PRJNA398089 SRR5949120 PRJNA398089 SRR5935950 human_gut
+PSM7J17Z PRJNA398089 SRR5949121 PRJNA398089 SRR5935951 human_gut
+PSM7J18G PRJNA398089 SRR5949122 PRJNA398089 SRR5935952 human_gut
+MSM79HDI PRJNA398089 SRR5949123 PRJNA398089 SRR5935954 human_gut
+HSM7J4JH PRJNA398089 SRR5949124 PRJNA398089 SRR5935955 human_gut
+HSM7J4HU PRJNA398089 SRR5949125 PRJNA398089 SRR5935956 human_gut
+PSM7J1CI PRJNA398089 SRR5949126 PRJNA398089 SRR5935957 human_gut
+HSM7CZ36 PRJNA398089 SRR5949127 PRJNA398089 SRR5936016 human_gut
+CSM7KOJW PRJNA398089 SRR5949128 PRJNA398089 SRR5947102 human_gut
+MSM79HDM PRJNA398089 SRR5949129 PRJNA398089 SRR5936018 human_gut
+PSM6XBTP PRJNA398089 SRR5949130 PRJNA398089 SRR5936066 human_gut
+MSM79HF9 PRJNA398089 SRR5949131 PRJNA398089 SRR5936064 human_gut
+CSM79HQF PRJNA398089 SRR5949132 PRJNA398089 SRR5936119 human_gut
+MSM79H9A PRJNA398089 SRR5949133 PRJNA398089 SRR5936118 human_gut
+HSM7CZ3E PRJNA398089 SRR5949134 PRJNA398089 SRR5936121 human_gut
+CSM79HP4 PRJNA398089 SRR5949135 PRJNA398089 SRR5946645 human_gut
+HSM7CYY7 PRJNA398089 SRR5949136 PRJNA398089 SRR5936117 human_gut
+CSM79HLC PRJNA398089 SRR5949137 PRJNA398089 SRR5936176 human_gut
+MSM79H5K PRJNA398089 SRR5949138 PRJNA398089 SRR5936132 human_gut
+HSM6XRV8 PRJNA398089 SRR5949139 PRJNA398089 SRR5935785 human_gut
+CSM79HIZ PRJNA398089 SRR5949140 PRJNA398089 SRR5935786 human_gut
+CSM79HIB PRJNA398089 SRR5949141 PRJNA398089 SRR5935794 human_gut
+HSM7CZ1A PRJNA398089 SRR5949142 PRJNA398089 SRR5935798 human_gut
+CSM79HIN PRJNA398089 SRR5949143 PRJNA398089 SRR5935792 human_gut
+MSM79H6F PRJNA398089 SRR5949144 PRJNA398089 SRR5935793 human_gut
+HSM6XRVC PRJNA398089 SRR5949145 PRJNA398089 SRR5936043 human_gut
+PSM7J199 PRJNA398089 SRR5949146 PRJNA398089 SRR5936131 human_gut
+HSM7CYX6 PRJNA398089 SRR5949147 PRJNA398089 SRR5936134 human_gut
+CSM7KORM PRJNA398089 SRR5949148 PRJNA398089 SRR5950794 human_gut
+PSM7J12J PRJNA398089 SRR5949149 PRJNA398089 SRR5936100 human_gut
+ESM5MEBS PRJNA398089 SRR5949150 PRJNA398089 SRR5935857 human_gut
+MSM6J2IE PRJNA398089 SRR5949151 PRJNA398089 SRR5935858 human_gut
+MSM5LLDS PRJNA398089 SRR5949152 PRJNA398089 SRR5935763 human_gut
+CSM7KOJY PRJNA398089 SRR5949153 PRJNA398089 SRR5946685 human_gut
+HSM5FZBZ PRJNA398089 SRR5949154 PRJNA398089 SRR5946845 human_gut
+CSM5FZ4M PRJNA398089 SRR5949155 PRJNA398089 SRR5935761 human_gut
+HSM5MD5B PRJNA398089 SRR5949156 PRJNA398089 SRR5935768 human_gut
+HSM5MD53 PRJNA398089 SRR5949157 PRJNA398089 SRR5947002 human_gut
+CSM5MCUO PRJNA398089 SRR5949158 PRJNA398089 SRR5935859 human_gut
+CSM5MCXD PRJNA398089 SRR5949159 PRJNA398089 SRR5935860 human_gut
+PSM6XBS4 PRJNA398089 SRR5949160 PRJNA398089 SRR5935964 human_gut
+HSM6XRUN PRJNA398089 SRR5949161 PRJNA398089 SRR5935816 human_gut
+MSM6J2QP PRJNA398089 SRR5949162 PRJNA398089 SRR5935817 human_gut
+CSM67UEW PRJNA398089 SRR5949163 PRJNA398089 SRR5935962 human_gut
+HSM6XRQO PRJNA398089 SRR5949164 PRJNA398089 SRR5936154 human_gut
+HSM67VD4 PRJNA398089 SRR5949165 PRJNA398089 SRR5936160 human_gut
+HSM67VI3 PRJNA398089 SRR5949166 PRJNA398089 SRR5936159 human_gut
+CSM67UEA PRJNA398089 SRR5949167 PRJNA398089 SRR5936162 human_gut
+HSM67VES PRJNA398089 SRR5949168 PRJNA398089 SRR5935813 human_gut
+HSM6XRTM PRJNA398089 SRR5949169 PRJNA398089 SRR5935815 human_gut
+ESM5MEC5 PRJNA398089 SRR5949170 PRJNA398089 SRR5935758 human_gut
+CSM67UAW PRJNA398089 SRR5949171 PRJNA398089 SRR5935755 human_gut
+HSM6XRQU PRJNA398089 SRR5949172 PRJNA398089 SRR5935901 human_gut
+CSM67UBH PRJNA398089 SRR5949173 PRJNA398089 SRR5935900 human_gut
+CSM67UAM PRJNA398089 SRR5949174 PRJNA398089 SRR5935904 human_gut
+CSM67UAA PRJNA398089 SRR5949175 PRJNA398089 SRR5935903 human_gut
+CSM67UDF PRJNA398089 SRR5949176 PRJNA398089 SRR5935908 human_gut
+MSM6J2Q3 PRJNA398089 SRR5949177 PRJNA398089 SRR5935907 human_gut
+MSM6J2LL PRJNA398089 SRR5949178 PRJNA398089 SRR5935899 human_gut
+ESM5MEBG PRJNA398089 SRR5949179 PRJNA398089 SRR5935902 human_gut
+MSM6J2PU PRJNA398089 SRR5949180 PRJNA398089 SRR5935892 human_gut
+HSM7CYZV PRJNA398089 SRR5949181 PRJNA398089 SRR5935891 human_gut
+CSM79HKB PRJNA398089 SRR5949182 PRJNA398089 SRR5935890 human_gut
+HSM7CYZJ PRJNA398089 SRR5949183 PRJNA398089 SRR5935889 human_gut
+CSM79HLG PRJNA398089 SRR5949184 PRJNA398089 SRR5936005 human_gut
+CSM79HI7 PRJNA398089 SRR5949185 PRJNA398089 SRR5935894 human_gut
+CSM79HIV PRJNA398089 SRR5949186 PRJNA398089 SRR5935895 human_gut
+HSM67VGG PRJNA398089 SRR5949187 PRJNA398089 SRR5935893 human_gut
+MSM6J2MH PRJNA398089 SRR5949188 PRJNA398089 SRR5935888 human_gut
+CSM79HGZ PRJNA398089 SRR5949189 PRJNA398089 SRR5946660 human_gut
+MSM6J2QF PRJNA398089 SRR5949190 PRJNA398089 SRR5936052 human_gut
+CSM79HLA PRJNA398089 SRR5949191 PRJNA398089 SRR5936051 human_gut
+HSM67VHD PRJNA398089 SRR5949192 PRJNA398089 SRR5936232 human_gut
+HSM6XRQ8 PRJNA398089 SRR5949193 PRJNA398089 SRR5936229 human_gut
+CSM67UF5 PRJNA398089 SRR5949194 PRJNA398089 SRR5936234 human_gut
+MSM6J2RS PRJNA398089 SRR5949195 PRJNA398089 SRR5936238 human_gut
+HSM6XRVM PRJNA398089 SRR5949196 PRJNA398089 SRR5936226 human_gut
+HSM67VEK PRJNA398089 SRR5949197 PRJNA398089 SRR5936230 human_gut
+HSM67VHW PRJNA398089 SRR5949198 PRJNA398089 SRR5936227 human_gut
+HSM67VFR PRJNA398089 SRR5949199 PRJNA398089 SRR5936228 human_gut
+CSM7KONK PRJNA398089 SRR5949200 PRJNA398089 SRR5950508 human_gut
+MSM9VZEK PRJNA398089 SRR5949201 PRJNA398089 SRR5935847 human_gut
+MSM79H8D PRJNA398089 SRR5949202 PRJNA398089 SRR5935843 human_gut
+CSM7KOSV PRJNA398089 SRR5949203 PRJNA398089 SRR5950509 human_gut
+HSM7J4IC PRJNA398089 SRR5949204 PRJNA398089 SRR5935838 human_gut
+CSM7KON8 PRJNA398089 SRR5949205 PRJNA398089 SRR5935837 human_gut
+HSM7J4L9 PRJNA398089 SRR5949206 PRJNA398089 SRR5935846 human_gut
+HSM7J4IQ PRJNA398089 SRR5949207 PRJNA398089 SRR5935842 human_gut
+HSM7J4MY PRJNA398089 SRR5949208 PRJNA398089 SRR5935910 human_gut
+HSM7J4QT PRJNA398089 SRR5949209 PRJNA398089 SRR5935844 human_gut
+CSM5MCX3 PRJNA398089 SRR5949210 PRJNA398089 SRR5935942 human_gut
+CSM7KOKD PRJNA398089 SRR5949211 PRJNA398089 SRR5950676 human_gut
+CSM7KOOH PRJNA398089 SRR5949212 PRJNA398089 SRR5950642 human_gut
+HSM7J4LD PRJNA398089 SRR5949213 PRJNA398089 SRR5935991 human_gut
+HSM7CYY9 PRJNA398089 SRR5949214 PRJNA398089 SRR5935990 human_gut
+CSM7KOJO PRJNA398089 SRR5949215 PRJNA398089 SRR5936062 human_gut
+MSM79HDQ PRJNA398089 SRR5949216 PRJNA398089 SRR5936063 human_gut
+MSM79HCP PRJNA398089 SRR5949217 PRJNA398089 SRR5936056 human_gut
+MSM79H9Q PRJNA398089 SRR5949218 PRJNA398089 SRR5936058 human_gut
+PSM7J1BF PRJNA398089 SRR5949219 PRJNA398089 SRR5935994 human_gut
+CSM7KON2 PRJNA398089 SRR5949220 PRJNA398089 SRR5935992 human_gut
+HSM7J4HS PRJNA398089 SRR5949221 PRJNA398089 SRR5935783 human_gut
+MSM79H7C PRJNA398089 SRR5949222 PRJNA398089 SRR5935741 human_gut
+HSM7J4LP PRJNA398089 SRR5949223 PRJNA398089 SRR5935743 human_gut
+PSM7J18E PRJNA398089 SRR5949224 PRJNA398089 SRR5935744 human_gut
+HSM7J4I5 PRJNA398089 SRR5949225 PRJNA398089 SRR5935745 human_gut
+PSM7J17L PRJNA398089 SRR5949226 PRJNA398089 SRR5936015 human_gut
+MSM79HDG PRJNA398089 SRR5949227 PRJNA398089 SRR5935750 human_gut
+PSM7J17X PRJNA398089 SRR5949228 PRJNA398089 SRR5935747 human_gut
+CSM67UAU PRJNA398089 SRR5949229 PRJNA398089 SRR5935807 human_gut
+HSM6XRQI PRJNA398089 SRR5949230 PRJNA398089 SRR5935805 human_gut
+CSM7KOLE PRJNA398089 SRR5949231 PRJNA398089 SRR5935967 human_gut
+PSM7J16U PRJNA398089 SRR5949232 PRJNA398089 SRR5935781 human_gut
+CSM79HHO PRJNA398089 SRR5949233 PRJNA398089 SRR5946885 human_gut
+HSM67VD6 PRJNA398089 SRR5949234 PRJNA398089 SRR5936073 human_gut
+CSM67UB3 PRJNA398089 SRR5949235 PRJNA398089 SRR5936068 human_gut
+MSM6J2LB PRJNA398089 SRR5949236 PRJNA398089 SRR5936069 human_gut
+MSM6J2RO PRJNA398089 SRR5949237 PRJNA398089 SRR5935810 human_gut
+CSM79HGP PRJNA398089 SRR5949238 PRJNA398089 SRR5935808 human_gut
+CSM67UDR PRJNA398089 SRR5949239 PRJNA398089 SRR5935814 human_gut
+HSM67VGA PRJNA398089 SRR5949240 PRJNA398089 SRR5935958 human_gut
+MSM6J2LV PRJNA398089 SRR5949241 PRJNA398089 SRR5935812 human_gut
+CSM67UBN PRJNA398089 SRR5949242 PRJNA398089 SRR5935811 human_gut
+HSM7J4NO PRJNA398089 SRR5949243 PRJNA398089 SRR5935875 human_gut
+HSM7J4KQ PRJNA398089 SRR5949244 PRJNA398089 SRR5936173 human_gut
+MSM9VZMA PRJNA398089 SRR5949245 PRJNA398089 SRR5935773 human_gut
+MSM9VZNH PRJNA398089 SRR5949246 PRJNA398089 SRR5935774 human_gut
+HSM7J4PI PRJNA398089 SRR5949247 PRJNA398089 SRR5936189 human_gut
+CSM79HRG PRJNA398089 SRR5949248 PRJNA398089 SRR5950737 human_gut
+HSM7J4HC PRJNA398089 SRR5949249 PRJNA398089 SRR5936025 human_gut
+PSM7J1A6 PRJNA398089 SRR5949250 PRJNA398089 SRR5936021 human_gut
+MSM79H9M PRJNA398089 SRR5949251 PRJNA398089 SRR5936020 human_gut
+MSM79HCI PRJNA398089 SRR5949252 PRJNA398089 SRR5936019 human_gut
+MSM79HDA PRJNA398089 SRR5949253 PRJNA398089 SRR5936026 human_gut
+PSM7J1BB PRJNA398089 SRR5949254 PRJNA398089 SRR5936024 human_gut
+PSM6XBUK PRJNA398089 SRR5949255 PRJNA398089 SRR5936023 human_gut
+HSM7J4HM PRJNA398089 SRR5949256 PRJNA398089 SRR5936022 human_gut
+MSM79H5Q PRJNA398089 SRR5949257 PRJNA398089 SRR5936112 human_gut
+HSM67VHJ PRJNA398089 SRR5949258 PRJNA398089 SRR5936114 human_gut
+PSM6XBT9 PRJNA398089 SRR5949259 PRJNA398089 SRR5935789 human_gut
+HSM67VGK PRJNA398089 SRR5949260 PRJNA398089 SRR5935788 human_gut
+CSM79HOL PRJNA398089 SRR5949261 PRJNA398089 SRR5936109 human_gut
+ESM5ME9U PRJNA398089 SRR5949262 PRJNA398089 SRR5936113 human_gut
+HSM7CZ1V PRJNA398089 SRR5949263 PRJNA398089 SRR5936110 human_gut
+CSM79HPK PRJNA398089 SRR5949264 PRJNA398089 SRR5936111 human_gut
+CSM79HPA PRJNA398089 SRR5949265 PRJNA398089 SRR5946615 human_gut
+PSM6XBV4 PRJNA398089 SRR5949266 PRJNA398089 SRR5935791 human_gut
+CSM7KORO PRJNA398089 SRR5949267 PRJNA398089 SRR5950523 human_gut
+CSM7KOPI PRJNA398089 SRR5949268 PRJNA398089 SRR5935974 human_gut
+HSM7J4J7 PRJNA398089 SRR5949269 PRJNA398089 SRR5935977 human_gut
+HSM7J4LH PRJNA398089 SRR5949270 PRJNA398089 SRR5935973 human_gut
+CSM7KOPU PRJNA398089 SRR5949271 PRJNA398089 SRR5950627 human_gut
+HSM7J4IO PRJNA398089 SRR5949272 PRJNA398089 SRR5935972 human_gut
+MSM79H81 PRJNA398089 SRR5949273 PRJNA398089 SRR5935971 human_gut
+MSM79H7O PRJNA398089 SRR5949274 PRJNA398089 SRR5935970 human_gut
+CSM7KOOV PRJNA398089 SRR5949275 PRJNA398089 SRR5950665 human_gut
+MSM79H9G PRJNA398089 SRR5949276 PRJNA398089 SRR5935968 human_gut
+HSM5MD7S PRJNA398089 SRR5949277 PRJNA398089 SRR5935936 human_gut
+MSM6J2JZ PRJNA398089 SRR5949278 PRJNA398089 SRR5935833 human_gut
+CSM5MCWQ PRJNA398089 SRR5949279 PRJNA398089 SRR5935887 human_gut
+HSM5MD7K PRJNA398089 SRR5949280 PRJNA398089 SRR5936086 human_gut
+MSM6J2HP PRJNA398089 SRR5949281 PRJNA398089 SRR5936084 human_gut
+CSM5MCW6 PRJNA398089 SRR5949282 PRJNA398089 SRR5936088 human_gut
+HSM5MD66 PRJNA398089 SRR5949283 PRJNA398089 SRR5936089 human_gut
+MSM6J2J5 PRJNA398089 SRR5949284 PRJNA398089 SRR5935829 human_gut
+MSM5LLDA PRJNA398089 SRR5949285 PRJNA398089 SRR5935828 human_gut
+MSM6J2JN PRJNA398089 SRR5949286 PRJNA398089 SRR5935827 human_gut
+HSM5MD4N PRJNA398089 SRR5949287 PRJNA398089 SRR5936235 human_gut
+CSM7KOL4 PRJNA398089 SRR5949288 PRJNA398089 SRR5935869 human_gut
+PSM7J1A2 PRJNA398089 SRR5949289 PRJNA398089 SRR5935868 human_gut
+HSM7CZ14 PRJNA398089 SRR5949290 PRJNA398089 SRR5936208 human_gut
+HSM67VIL PRJNA398089 SRR5949291 PRJNA398089 SRR5935871 human_gut
+HSM7J4PQ PRJNA398089 SRR5949292 PRJNA398089 SRR5935870 human_gut
+MSM79HC4 PRJNA398089 SRR5949293 PRJNA398089 SRR5935866 human_gut
+CSM7KOKR PRJNA398089 SRR5949294 PRJNA398089 SRR5946854 human_gut
+ESM7F5C5 PRJNA398089 SRR5949295 PRJNA398089 SRR5935867 human_gut
+CSM7KOKJ PRJNA398089 SRR5949296 PRJNA398089 SRR5947013 human_gut
+CSM7KOJU PRJNA398089 SRR5949297 PRJNA398089 SRR5946674 human_gut
+HSM67VCZ PRJNA398089 SRR5949298 PRJNA398089 SRR5935760 human_gut
+MSM6J2R2 PRJNA398089 SRR5949299 PRJNA398089 SRR5935759 human_gut
+CSM67U9D PRJNA398089 SRR5949300 PRJNA398089 SRR5935754 human_gut
+MSM6J2QR PRJNA398089 SRR5949301 PRJNA398089 SRR5935753 human_gut
+CSM67UBZ PRJNA398089 SRR5949302 PRJNA398089 SRR5936012 human_gut
+HSM5MD7U PRJNA398089 SRR5949303 PRJNA398089 SRR5936017 human_gut
+ESM718V8 PRJNA398089 SRR5949304 PRJNA398089 SRR5935757 human_gut
+HSM6XRSG PRJNA398089 SRR5949305 PRJNA398089 SRR5935752 human_gut
+HSM67VEO PRJNA398089 SRR5949306 PRJNA398089 SRR5936014 human_gut
+HSM6XRQW PRJNA398089 SRR5949307 PRJNA398089 SRR5936008 human_gut
+MSM79H7Q PRJNA398089 SRR5949308 PRJNA398089 SRR5936095 human_gut
+CSM67UAK PRJNA398089 SRR5949309 PRJNA398089 SRR5935931 human_gut
+CSM5MCZ3 PRJNA398089 SRR5949310 PRJNA398089 SRR5936239 human_gut
+CSM7KORC PRJNA398089 SRR5949311 PRJNA398089 SRR5950734 human_gut
+HSM6XRR7 PRJNA398089 SRR5949312 PRJNA398089 SRR5935935 human_gut
+CSM5MCXR PRJNA398089 SRR5949313 PRJNA398089 SRR5935933 human_gut
+MSM6J2IO PRJNA398089 SRR5949314 PRJNA398089 SRR5935932 human_gut
+ESM5MEDF PRJNA398089 SRR5949315 PRJNA398089 SRR5935934 human_gut
+MSM6J2KM PRJNA398089 SRR5949316 PRJNA398089 SRR5936236 human_gut
+HSM7J4M8 PRJNA398089 SRR5949317 PRJNA398089 SRR5936094 human_gut
+HSM6XRS4 PRJNA398089 SRR5949318 PRJNA398089 SRR5935905 human_gut
+HSM6XRQK PRJNA398089 SRR5949319 PRJNA398089 SRR5935906 human_gut
+CSM7KOR2 PRJNA398089 SRR5949320 PRJNA398089 SRR5950790 human_gut
+CSM67UGO PRJNA398089 SRR5949321 PRJNA398089 SRR5936075 human_gut
+ESM718TK PRJNA398089 SRR5949322 PRJNA398089 SRR5936077 human_gut
+MSM6J2MB PRJNA398089 SRR5949323 PRJNA398089 SRR5936076 human_gut
+HSM67VF9 PRJNA398089 SRR5949324 PRJNA398089 SRR5936071 human_gut
+CSM79HJM PRJNA398089 SRR5949325 PRJNA398089 SRR5946750 human_gut
+ESM5MECL PRJNA398089 SRR5949326 PRJNA398089 SRR5936070 human_gut
+CSM79HGF PRJNA398089 SRR5949327 PRJNA398089 SRR5935959 human_gut
+MSM6J2R8 PRJNA398089 SRR5949328 PRJNA398089 SRR5936072 human_gut
+MSM6J2HR PRJNA398089 SRR5949329 PRJNA398089 SRR5935916 human_gut
+HSM7J4QF PRJNA398089 SRR5949330 PRJNA398089 SRR5936196 human_gut
+MSM79HFY PRJNA398089 SRR5949331 PRJNA398089 SRR5936195 human_gut
+HSM7J4N4 PRJNA398089 SRR5949332 PRJNA398089 SRR5936028 human_gut
+MSM79H7Y PRJNA398089 SRR5949333 PRJNA398089 SRR5936099 human_gut
+CSM7KOOD PRJNA398089 SRR5949334 PRJNA398089 SRR5950527 human_gut
+MSM9VZGO PRJNA398089 SRR5949335 PRJNA398089 SRR5936105 human_gut
+HSM7J4JN PRJNA398089 SRR5949336 PRJNA398089 SRR5936102 human_gut
+PSM7J186 PRJNA398089 SRR5949337 PRJNA398089 SRR5936103 human_gut
+PSM7J177 PRJNA398089 SRR5949338 PRJNA398089 SRR5936187 human_gut
+MSM79HC8 PRJNA398089 SRR5949339 PRJNA398089 SRR5936191 human_gut
+PSM7J1AU PRJNA398089 SRR5949340 PRJNA398089 SRR5936194 human_gut
+HSM7CZ38 PRJNA398089 SRR5949341 PRJNA398089 SRR5936188 human_gut
+CSM7KOKN PRJNA398089 SRR5949342 PRJNA398089 SRR5950725 human_gut
+MSM79HCK PRJNA398089 SRR5949343 PRJNA398089 SRR5936198 human_gut
+PSM7J1CC PRJNA398089 SRR5949344 PRJNA398089 SRR5936193 human_gut
+HSM7J4I3 PRJNA398089 SRR5949345 PRJNA398089 SRR5936192 human_gut
+PSM6XBV2 PRJNA398089 SRR5949346 PRJNA398089 SRR5935982 human_gut
+HSM6XRV6 PRJNA398089 SRR5949347 PRJNA398089 SRR5935983 human_gut
+CSM79HLE PRJNA398089 SRR5949348 PRJNA398089 SRR5935826 human_gut
+CSM79HIL PRJNA398089 SRR5949349 PRJNA398089 SRR5946873 human_gut
+MSM6J2MJ PRJNA398089 SRR5949350 PRJNA398089 SRR5935832 human_gut
+PSM6XBSU PRJNA398089 SRR5949351 PRJNA398089 SRR5936106 human_gut
+CSM79HJA PRJNA398089 SRR5949352 PRJNA398089 SRR5946871 human_gut
+HSM67VG8 PRJNA398089 SRR5949353 PRJNA398089 SRR5936104 human_gut
+CSM79HIX PRJNA398089 SRR5949354 PRJNA398089 SRR5936108 human_gut
+HSM7CYX4 PRJNA398089 SRR5949355 PRJNA398089 SRR5936107 human_gut
+HSM7J4JZ PRJNA398089 SRR5949356 PRJNA398089 SRR5935839 human_gut
+PSM7J182 PRJNA398089 SRR5949357 PRJNA398089 SRR5935840 human_gut
+PSM7J1A8 PRJNA398089 SRR5949358 PRJNA398089 SRR5935801 human_gut
+MSM79HDC PRJNA398089 SRR5949359 PRJNA398089 SRR5935800 human_gut
+MSM79HEY PRJNA398089 SRR5949360 PRJNA398089 SRR5935795 human_gut
+PSM7J19J PRJNA398089 SRR5949361 PRJNA398089 SRR5935806 human_gut
+HSM7CYYD PRJNA398089 SRR5949362 PRJNA398089 SRR5935802 human_gut
+CSM79HR6 PRJNA398089 SRR5949363 PRJNA398089 SRR5950716 human_gut
+MSM79HAJ PRJNA398089 SRR5949364 PRJNA398089 SRR5935799 human_gut
+HSM7J4G1 PRJNA398089 SRR5949365 PRJNA398089 SRR5935797 human_gut
+CSM7KOKB PRJNA398089 SRR5949366 PRJNA398089 SRR5950714 human_gut
+CSM7KOMP PRJNA398089 SRR5949367 PRJNA398089 SRR5935796 human_gut
+HSM5MD43 PRJNA398089 SRR5949368 PRJNA398089 SRR5935969 human_gut
+MSM6J2KC PRJNA398089 SRR5949369 PRJNA398089 SRR5936085 human_gut
+ESM5MEDN PRJNA398089 SRR5949370 PRJNA398089 SRR5936184 human_gut
+CSM5MCVN PRJNA398089 SRR5949371 PRJNA398089 SRR5935841 human_gut
+HSM5MD62 PRJNA398089 SRR5949372 PRJNA398089 SRR5936183 human_gut
+HSM5MD5D PRJNA398089 SRR5949373 PRJNA398089 SRR5935966 human_gut
+MSM5LLF4 PRJNA398089 SRR5949374 PRJNA398089 SRR5936079 human_gut
+ESM5MEBU PRJNA398089 SRR5949375 PRJNA398089 SRR5936083 human_gut
+CSM5MCWC PRJNA398089 SRR5949376 PRJNA398089 SRR5936186 human_gut
+CSM5MCXJ PRJNA398089 SRR5949377 PRJNA398089 SRR5936080 human_gut
+ESM7F5CB PRJNA398089 SRR5949378 PRJNA398089 SRR5936203 human_gut
+PSM7J19N PRJNA398089 SRR5949379 PRJNA398089 SRR5936204 human_gut
+CSM7KOK5 PRJNA398089 SRR5949380 PRJNA398089 SRR5936205 human_gut
+CSM7KOJG PRJNA398089 SRR5949381 PRJNA398089 SRR5936207 human_gut
+PSM7J19P PRJNA398089 SRR5949382 PRJNA398089 SRR5936201 human_gut
+CSM79HR8 PRJNA398089 SRR5949383 PRJNA398089 SRR5946700 human_gut
+HSM7CZ3C PRJNA398089 SRR5949384 PRJNA398089 SRR5936206 human_gut
+HSM7CZ32 PRJNA398089 SRR5949385 PRJNA398089 SRR5936200 human_gut
+HSM7CYXA PRJNA398089 SRR5949386 PRJNA398089 SRR5936209 human_gut
+PSM6XBUI PRJNA398089 SRR5949387 PRJNA398089 SRR5936202 human_gut
+CSM67UAY PRJNA398089 SRR5949388 PRJNA398089 SRR5936011 human_gut
+HSM5MD6W PRJNA398089 SRR5949389 PRJNA398089 SRR5936010 human_gut
+CSM67UB9 PRJNA398089 SRR5949390 PRJNA398089 SRR5962905 human_gut
+CSM67UDJ PRJNA398089 SRR5949391 PRJNA398089 SRR5936009 human_gut
+HSM67VD2 PRJNA398089 SRR5949392 PRJNA398089 SRR5936006 human_gut
+CSM67UAO PRJNA398089 SRR5949393 PRJNA398089 SRR5946754 human_gut
+HSM6XRSI PRJNA398089 SRR5949394 PRJNA398089 SRR5935997 human_gut
+HSM6XRST PRJNA398089 SRR5949395 PRJNA398089 SRR5936001 human_gut
+HSM6XRQM PRJNA398089 SRR5949396 PRJNA398089 SRR5935998 human_gut
+CSM67UE7 PRJNA398089 SRR5949397 PRJNA398089 SRR5935999 human_gut
+HSM7J4IP PRJNA398089 SRR5949398 PRJNA398089 SRR5935949 human_gut
+PSM7J193 PRJNA398089 SRR5949399 PRJNA398089 SRR5936093 human_gut
+HSM5MD7O PRJNA398089 SRR5949400 PRJNA398089 SRR5935941 human_gut
+HSM5MD3Y PRJNA398089 SRR5949401 PRJNA398089 SRR5935940 human_gut
+HSM5MD6A PRJNA398089 SRR5949402 PRJNA398089 SRR5935939 human_gut
+HSM6XRQB PRJNA398089 SRR5949403 PRJNA398089 SRR5935765 human_gut
+CSM7KOO9 PRJNA398089 SRR5949404 PRJNA398089 SRR5950486 human_gut
+CSM5MCZB PRJNA398089 SRR5949405 PRJNA398089 SRR5935945 human_gut
+CSM5MCXN PRJNA398089 SRR5949406 PRJNA398089 SRR5935909 human_gut
+ESM5GEXY PRJNA398089 SRR5949407 PRJNA398089 SRR5935911 human_gut
+HSM6XRT8 PRJNA398089 SRR5949408 PRJNA398089 SRR5935915 human_gut
+HSM6XRSX PRJNA398089 SRR5949409 PRJNA398089 SRR5935769 human_gut
+CSM7KOPK PRJNA398089 SRR5949410 PRJNA398089 SRR5950484 human_gut
+HSM7J4MK PRJNA398089 SRR5949411 PRJNA398089 SRR5935948 human_gut
+HSM5MD5P PRJNA398089 SRR5949412 PRJNA398089 SRR5935918 human_gut
+PSM7J18I PRJNA398089 SRR5949413 PRJNA398089 SRR5936101 human_gut
+CSM79HID PRJNA398089 SRR5949414 PRJNA398089 SRR5936030 human_gut
+HSM6XRUR PRJNA398089 SRR5949415 PRJNA398089 SRR5936033 human_gut
+HSM67VHB PRJNA398089 SRR5949416 PRJNA398089 SRR5936029 human_gut
+CSM79HJW PRJNA398089 SRR5949417 PRJNA398089 SRR5936218 human_gut
+HSM67VF3 PRJNA398089 SRR5949418 PRJNA398089 SRR5936041 human_gut
+HSM67VEU PRJNA398089 SRR5949419 PRJNA398089 SRR5936037 human_gut
+HSM67VFZ PRJNA398089 SRR5949420 PRJNA398089 SRR5936031 human_gut
+CSM79HH4 PRJNA398089 SRR5949421 PRJNA398089 SRR5947037 human_gut
+MSM9VZEU PRJNA398089 SRR5949422 PRJNA398089 SRR5936092 human_gut
+CSM79HIF PRJNA398089 SRR5949423 PRJNA398089 SRR5946749 human_gut
+CSM79HI3 PRJNA398089 SRR5949424 PRJNA398089 SRR5936222 human_gut
+HSM7J4JJ PRJNA398089 SRR5949425 PRJNA398089 SRR5936091 human_gut
+MSM79H87 PRJNA398089 SRR5949426 PRJNA398089 SRR5935872 human_gut
+MSM79HB6 PRJNA398089 SRR5949427 PRJNA398089 SRR5935850 human_gut
+HSM7J4LN PRJNA398089 SRR5949428 PRJNA398089 SRR5935851 human_gut
+PSM7J17F PRJNA398089 SRR5949429 PRJNA398089 SRR5936096 human_gut
+MSM6J2K2 PRJNA398089 SRR5949430 PRJNA398089 SRR5935914 human_gut
+HSM7J4NE PRJNA398089 SRR5949431 PRJNA398089 SRR5936174 human_gut
+PSM7J18K PRJNA398089 SRR5949432 PRJNA398089 SRR5935861 human_gut
+PSM7J136 PRJNA398089 SRR5949433 PRJNA398089 SRR5936172 human_gut
+CSM7KOPO PRJNA398089 SRR5949434 PRJNA398089 SRR5950771 human_gut
+CSM7KORU PRJNA398089 SRR5949435 PRJNA398089 SRR5950772 human_gut
+MSM79H83 PRJNA398089 SRR5949436 PRJNA398089 SRR5936098 human_gut
+CSM79HPS PRJNA398089 SRR5949437 PRJNA398089 SRR5935865 human_gut
+PSM7J1AM PRJNA398089 SRR5949438 PRJNA398089 SRR5935864 human_gut
+ESM718T7 PRJNA398089 SRR5949439 PRJNA398089 SRR5935886 human_gut
+HSM6XRVW PRJNA398089 SRR5949440 PRJNA398089 SRR5935874 human_gut
+HSM7J4PS PRJNA398089 SRR5949441 PRJNA398089 SRR5935873 human_gut
+HSM7CYXC PRJNA398089 SRR5949442 PRJNA398089 SRR5935930 human_gut
+MSM79HEU PRJNA398089 SRR5949443 PRJNA398089 SRR5936081 human_gut
+CSM79HR2 PRJNA398089 SRR5949444 PRJNA398089 SRR5946648 human_gut
+HSM7CZ2Z PRJNA398089 SRR5949445 PRJNA398089 SRR5936128 human_gut
+CSM79HN6 PRJNA398089 SRR5949446 PRJNA398089 SRR5946941 human_gut
+HSM67VFJ PRJNA398089 SRR5949447 PRJNA398089 SRR5936129 human_gut
+HSM6XRVK PRJNA398089 SRR5949448 PRJNA398089 SRR5936130 human_gut
+CSM79HMT PRJNA398089 SRR5949449 PRJNA398089 SRR5936185 human_gut
+CSM79HPC PRJNA398089 SRR5949450 PRJNA398089 SRR5947074 human_gut
+HSM7CYXI PRJNA398089 SRR5949451 PRJNA398089 SRR5936141 human_gut
+HSM7CYYH PRJNA398089 SRR5949452 PRJNA398089 SRR5936127 human_gut
+HSM7J4PO PRJNA398089 SRR5949453 PRJNA398089 SRR5935885 human_gut
+HSM67VGY PRJNA398089 SRR5949454 PRJNA398089 SRR5935883 human_gut
+CSM7KOO5 PRJNA398089 SRR5949455 PRJNA398089 SRR5950636 human_gut
+HSM7J4OZ PRJNA398089 SRR5949456 PRJNA398089 SRR5935779 human_gut
+MSM9VZF1 PRJNA398089 SRR5949457 PRJNA398089 SRR5935777 human_gut
+CSM7KOSP PRJNA398089 SRR5949458 PRJNA398089 SRR5950565 human_gut
+MSM9VZMO PRJNA398089 SRR5949459 PRJNA398089 SRR5935778 human_gut
+PSM7J18Q PRJNA398089 SRR5949460 PRJNA398089 SRR5935775 human_gut
+HSM7J4K6 PRJNA398089 SRR5949461 PRJNA398089 SRR5935776 human_gut
+HSM7J4O3 PRJNA398089 SRR5949462 PRJNA398089 SRR5935772 human_gut
+CSM7KOU9 PRJNA398089 SRR5949463 PRJNA398089 SRR5950609 human_gut
+HSM5MD48 PRJNA398089 SRR5949464 PRJNA398089 SRR5935853 human_gut
+MSM5LLDC PRJNA398089 SRR5949465 PRJNA398089 SRR5935854 human_gut
+MSM5LLEP PRJNA398089 SRR5949466 PRJNA398089 SRR5935855 human_gut
+MSM6J2HB PRJNA398089 SRR5949467 PRJNA398089 SRR5935856 human_gut
+CSM5MCY4 PRJNA398089 SRR5949468 PRJNA398089 SRR5936178 human_gut
+MSM6J2J3 PRJNA398089 SRR5949469 PRJNA398089 SRR5936177 human_gut
+MSM6J2IQ PRJNA398089 SRR5949470 PRJNA398089 SRR5935862 human_gut
+CSM5MCXH PRJNA398089 SRR5949471 PRJNA398089 SRR5935863 human_gut
+MSM5LLHC PRJNA398089 SRR5949472 PRJNA398089 SRR5936181 human_gut
+CSM5MCXT PRJNA398089 SRR5949473 PRJNA398089 SRR5936179 human_gut
+HSM67VEE PRJNA398089 SRR5949474 PRJNA398089 SRR5936155 human_gut
+HSM6XRRB PRJNA398089 SRR5949475 PRJNA398089 SRR5936161 human_gut
+CSM5MCY8 PRJNA398089 SRR5949476 PRJNA398089 SRR5935919 human_gut
+CSM67UCK PRJNA398089 SRR5949477 PRJNA398089 SRR5936002 human_gut
+HSM6XRRJ PRJNA398089 SRR5949478 PRJNA398089 SRR5936000 human_gut
+HSM67VEC PRJNA398089 SRR5949479 PRJNA398089 SRR5936004 human_gut
+HSM6XRRV PRJNA398089 SRR5949480 PRJNA398089 SRR5936003 human_gut
+MSM6J2QH PRJNA398089 SRR5949481 PRJNA398089 SRR5935995 human_gut
+HSM6XRUL PRJNA398089 SRR5949482 PRJNA398089 SRR5936156 human_gut
+CSM67UDN PRJNA398089 SRR5949483 PRJNA398089 SRR5936157 human_gut
+CSM67UFZ PRJNA398089 SRR5949484 PRJNA398089 SRR5936158 human_gut
+CSM5MCXL PRJNA398089 SRR5949485 PRJNA398089 SRR5935830 human_gut
+CSM5MCWE PRJNA398089 SRR5949486 PRJNA398089 SRR5935831 human_gut
+HSM5MD6Y PRJNA398089 SRR5949487 PRJNA398089 SRR5935834 human_gut
+MSM6J2RC PRJNA398089 SRR5949488 PRJNA398089 SRR5935986 human_gut
+MSM79H54 PRJNA398089 SRR5949489 PRJNA398089 SRR5935984 human_gut
+CSM79HKT PRJNA398089 SRR5949490 PRJNA398089 SRR5935980 human_gut
+CSM79HO1 PRJNA398089 SRR5949491 PRJNA398089 SRR5935979 human_gut
+MSM6J2SK PRJNA398089 SRR5949492 PRJNA398089 SRR5935985 human_gut
+MSM79H5G PRJNA398089 SRR5949493 PRJNA398089 SRR5935981 human_gut
+CSM79HNO PRJNA398089 SRR5949494 PRJNA398089 SRR5946940 human_gut
+CSM79HJS PRJNA398089 SRR5949495 PRJNA398089 SRR5946694 human_gut
+ESM718TF PRJNA398089 SRR5949496 PRJNA398089 SRR5935896 human_gut
+HSM7CYX2 PRJNA398089 SRR5949497 PRJNA398089 SRR5935897 human_gut
+HSM5MD6I PRJNA398089 SRR5949498 PRJNA398089 SRR5935770 human_gut
+MSM6J2KE PRJNA398089 SRR5949499 PRJNA398089 SRR5935913 human_gut
+MSM6J2JP PRJNA398089 SRR5949500 PRJNA398089 SRR5935912 human_gut
+HSM7J4MA PRJNA398089 SRR5949501 PRJNA398089 SRR5936171 human_gut
+CSM7KOSL PRJNA398089 SRR5949502 PRJNA398089 SRR5950776 human_gut
+MSM9VZFL PRJNA398089 SRR5949503 PRJNA398089 SRR5936168 human_gut
+MSM9VZEW PRJNA398089 SRR5949504 PRJNA398089 SRR5936167 human_gut
+HSM7J4IR PRJNA398089 SRR5949505 PRJNA398089 SRR5936170 human_gut
+PSM7J184 PRJNA398089 SRR5949506 PRJNA398089 SRR5936169 human_gut
+CSM7KOPM PRJNA398089 SRR5949507 PRJNA398089 SRR5950504 human_gut
+HSM7J4KC PRJNA398089 SRR5949508 PRJNA398089 SRR5936165 human_gut
+CSM7KOSX PRJNA398089 SRR5949509 PRJNA398089 SRR5950775 human_gut
+PSM7J1CU PRJNA398089 SRR5949510 PRJNA398089 SRR5936164 human_gut
+CSM7KOPW PRJNA398089 SRR5949511 PRJNA398089 SRR5950485 human_gut
+MSM79H9W PRJNA398089 SRR5949512 PRJNA398089 SRR5936097 human_gut
+PSM7J179 PRJNA398089 SRR5949513 PRJNA398089 SRR5936055 human_gut
+CSM7KOMB PRJNA398089 SRR5949514 PRJNA398089 SRR5950681 human_gut
+HSM7J4GD PRJNA398089 SRR5949515 PRJNA398089 SRR5936060 human_gut
+HSM7J4HO PRJNA398089 SRR5949516 PRJNA398089 SRR5936059 human_gut
+PSM6XBUQ PRJNA398089 SRR5949517 PRJNA398089 SRR5935803 human_gut
+CSM7KOLA PRJNA398089 SRR5949518 PRJNA398089 SRR5935804 human_gut
+CSM7KOLM PRJNA398089 SRR5949519 PRJNA398089 SRR5950680 human_gut
+HSM7J4PW PRJNA398089 SRR5949520 PRJNA398089 SRR5936054 human_gut
+HSM7J4R2 PRJNA398089 SRR5949521 PRJNA398089 SRR5936065 human_gut
+HSM7J4PK PRJNA398089 SRR5949522 PRJNA398089 SRR5936061 human_gut
+CSM7KOPS PRJNA398089 SRR5949523 PRJNA398089 SRR5950674 human_gut
+CSM7KOMT PRJNA398089 SRR5949524 PRJNA398089 SRR5935748 human_gut
+CSM7KOKZ PRJNA398089 SRR5949525 PRJNA398089 SRR5950635 human_gut
+PSM7J1BR PRJNA398089 SRR5949526 PRJNA398089 SRR5936139 human_gut
+MSM79HCR PRJNA398089 SRR5949527 PRJNA398089 SRR5936137 human_gut
+HSM7CYYB PRJNA398089 SRR5949528 PRJNA398089 SRR5936138 human_gut
+HSM7J4PM PRJNA398089 SRR5949529 PRJNA398089 SRR5936135 human_gut
+PSM7J17B PRJNA398089 SRR5949530 PRJNA398089 SRR5936136 human_gut
+HSM7J4QJ PRJNA398089 SRR5949531 PRJNA398089 SRR5936175 human_gut
+HSM7J4PY PRJNA398089 SRR5949532 PRJNA398089 SRR5936133 human_gut
+HSM67VGC PRJNA398089 SRR5949533 PRJNA398089 SRR5936213 human_gut
+CSM79HIJ PRJNA398089 SRR5949534 PRJNA398089 SRR5946657 human_gut
+MSM6J2QD PRJNA398089 SRR5949535 PRJNA398089 SRR5935746 human_gut
+CSM67UEI PRJNA398089 SRR5949536 PRJNA398089 SRR5935742 human_gut
+HSM67VEM PRJNA398089 SRR5949537 PRJNA398089 SRR5936125 human_gut
+MSM79H5E PRJNA398089 SRR5949538 PRJNA398089 SRR5935740 human_gut
+HSM67VIB PRJNA398089 SRR5949539 PRJNA398089 SRR5936126 human_gut
+HSM6XRVO PRJNA398089 SRR5949540 PRJNA398089 SRR5936122 human_gut
+HSM67VHF PRJNA398089 SRR5949541 PRJNA398089 SRR5936120 human_gut
+HSM7CYZ7 PRJNA398089 SRR5949542 PRJNA398089 SRR5936124 human_gut
+HSM6XRV4 PRJNA398089 SRR5949543 PRJNA398089 SRR5936123 human_gut
+CSM79HRC PRJNA398089 SRR5949544 PRJNA398089 SRR5946784 human_gut
+CSM79HHU PRJNA398089 SRR5949545 PRJNA398089 SRR5946653 human_gut
+CSM79HOZ PRJNA398089 SRR5949546 PRJNA398089 SRR5946946 human_gut
+MSM6J2OP PRJNA398089 SRR5949547 PRJNA398089 SRR5936053 human_gut
+CSM79HH8 PRJNA398089 SRR5949548 PRJNA398089 SRR5946908 human_gut
+CSM79HGX PRJNA398089 SRR5949549 PRJNA398089 SRR5946740 human_gut
+PSM6XBVK PRJNA398089 SRR5949550 PRJNA398089 SRR5936047 human_gut
+ESM7F5AK PRJNA398089 SRR5949551 PRJNA398089 SRR5936050 human_gut
+CSM79HIT PRJNA398089 SRR5949552 PRJNA398089 SRR5936046 human_gut
+HSM67VFD PRJNA398089 SRR5949553 PRJNA398089 SRR5936044 human_gut
+HSM7CYZT PRJNA398089 SRR5949554 PRJNA398089 SRR5936048 human_gut
+HSM7J4K4 PRJNA398089 SRR5949555 PRJNA398089 SRR5936078 human_gut
+HSM7J4MC PRJNA398089 SRR5949556 PRJNA398089 SRR5936067 human_gut
+MSM9VZMM PRJNA398089 SRR5949557 PRJNA398089 SRR5936197 human_gut
+MSM9VZEY PRJNA398089 SRR5949558 PRJNA398089 SRR5936199 human_gut
+MSM79HBB PRJNA398089 SRR5949559 PRJNA398089 SRR5935825 human_gut
+CSM7KOOR PRJNA398089 SRR5949560 PRJNA398089 SRR5950530 human_gut
+HSM7J4K2 PRJNA398089 SRR5949561 PRJNA398089 SRR5935852 human_gut
+HSM7J4JD PRJNA398089 SRR5949562 PRJNA398089 SRR5935822 human_gut
+CSM7KOTC PRJNA398089 SRR5949563 PRJNA398089 SRR5950564 human_gut
+HSM7J4IS PRJNA398089 SRR5949564 PRJNA398089 SRR5936090 human_gut
+PSM7J18M PRJNA398089 SRR5949565 PRJNA398089 SRR5936147 human_gut
+MSM79H9Y PRJNA398089 SRR5949566 PRJNA398089 SRR5936115 human_gut
+PSM7J19F PRJNA398089 SRR5949567 PRJNA398089 SRR5936116 human_gut
+MSM79HF5 PRJNA398089 SRR5949568 PRJNA398089 SRR5935884 human_gut
+PSM7J17V PRJNA398089 SRR5949569 PRJNA398089 SRR5935818 human_gut
+HSM7J4MS PRJNA398089 SRR5949570 PRJNA398089 SRR5935819 human_gut
+CSM7KONU PRJNA398089 SRR5949571 PRJNA398089 SRR5936190 human_gut
+MSM79HAL PRJNA398089 SRR5949572 PRJNA398089 SRR5936140 human_gut
+HSM7J4HQ PRJNA398089 SRR5949573 PRJNA398089 SRR5935987 human_gut
+MSM79HDE PRJNA398089 SRR5949574 PRJNA398089 SRR5935988 human_gut
+MSM79HA3 PRJNA398089 SRR5949575 PRJNA398089 SRR5935989 human_gut
+HSM7J4GR PRJNA398089 SRR5949576 PRJNA398089 SRR5935993 human_gut
+CSM79HNU PRJNA398089 SRR5949577 PRJNA398089 SRR5946826 human_gut
+HSM7CYX8 PRJNA398089 SRR5949578 PRJNA398089 SRR5935880 human_gut
+MSM79H5Y PRJNA398089 SRR5949579 PRJNA398089 SRR5935876 human_gut
+PSM7J19B PRJNA398089 SRR5949580 PRJNA398089 SRR5935877 human_gut
+HSM67VH1 PRJNA398089 SRR5949581 PRJNA398089 SRR5936142 human_gut
+HSM7CZ1Z PRJNA398089 SRR5949582 PRJNA398089 SRR5935881 human_gut
+HSM7CZ3A PRJNA398089 SRR5949583 PRJNA398089 SRR5935882 human_gut
+PSM6XBUG PRJNA398089 SRR5949584 PRJNA398089 SRR5935879 human_gut
+HSM7J4NY PRJNA398089 SRR5949585 PRJNA398089 SRR5950717 human_gut
+MSM79H7M PRJNA398089 SRR5949586 PRJNA398089 SRR5935782 human_gut
+HSM7J4ME PRJNA398089 SRR5949587 PRJNA398089 SRR5936027 human_gut
+HSM6XRR5 PRJNA398089 SRR5949588 PRJNA398089 SRR5936042 human_gut
+MSM6J2JD PRJNA398089 SRR5949589 PRJNA398089 SRR5936040 human_gut
+HSM5MD41 PRJNA398089 SRR5949590 PRJNA398089 SRR5936039 human_gut
+MSM6J2LJ PRJNA398089 SRR5949591 PRJNA398089 SRR5936038 human_gut
+HSM6XRSN PRJNA398089 SRR5949592 PRJNA398089 SRR5936045 human_gut
+CSM5MCZF PRJNA398089 SRR5949593 PRJNA398089 SRR5936166 human_gut
+MSM6J2JR PRJNA398089 SRR5949594 PRJNA398089 SRR5935784 human_gut
+ESM5MED2 PRJNA398089 SRR5949595 PRJNA398089 SRR5936049 human_gut
+MSM6J2HT PRJNA398089 SRR5949596 PRJNA398089 SRR5935749 human_gut
+HSM5MD73 PRJNA398089 SRR5949597 PRJNA398089 SRR5936220 human_gut
+CSM5MCXP PRJNA398089 SRR5949598 PRJNA398089 SRR5936224 human_gut
+CSM67UA2 PRJNA398089 SRR5949599 PRJNA398089 SRR5935780 human_gut
+ESM5MEB7 PRJNA398089 SRR5949600 PRJNA398089 SRR5935947 human_gut
+CSM67U9H PRJNA398089 SRR5949601 PRJNA398089 SRR5935767 human_gut
+CSM5MCZD PRJNA398089 SRR5949602 PRJNA398089 SRR5935946 human_gut
+HSM5MD6K PRJNA398089 SRR5949603 PRJNA398089 SRR5935764 human_gut
+HSM5MD7Q PRJNA398089 SRR5949604 PRJNA398089 SRR5936223 human_gut
+CSM5MCY2 PRJNA398089 SRR5949605 PRJNA398089 SRR5936163 human_gut
+MSM6J2SE PRJNA398089 SRR5949606 PRJNA398089 SRR5936035 human_gut
+MSM6J2PK PRJNA398089 SRR5949607 PRJNA398089 SRR5936036 human_gut
+PSM6XBSE PRJNA398089 SRR5949608 PRJNA398089 SRR5935961 human_gut
+MSM9VZOU PRJNA398089 SRR5950246 PRJNA398089 SRR5947080 human_gut
+HSMA33O3 PRJNA398089 SRR5950247 PRJNA398089 SRR5947083 human_gut
+PSM7J15S PRJNA398089 SRR5950248 PRJNA398089 SRR5947077 human_gut
+HSMA33OR PRJNA398089 SRR5950249 PRJNA398089 SRR5946855 human_gut
+PSM7J163 PRJNA398089 SRR5950250 PRJNA398089 SRR5947079 human_gut
+PSM7J14R PRJNA398089 SRR5950251 PRJNA398089 SRR5947078 human_gut
+PSM7J14T PRJNA398089 SRR5950252 PRJNA398089 SRR5947087 human_gut
+MSM9VZJZ PRJNA398089 SRR5950253 PRJNA398089 SRR5947081 human_gut
+MSMB4LYH PRJNA398089 SRR5950254 PRJNA398089 SRR5946709 human_gut
+MSM5LLDI PRJNA398089 SRR5950255 PRJNA398089 SRR5946771 human_gut
+MSM5LLDK PRJNA398089 SRR5950256 PRJNA398089 SRR5946775 human_gut
+PSMB4MC5 PRJNA398089 SRR5950257 PRJNA398089 SRR5946710 human_gut
+CSM9X22S PRJNA398089 SRR5950258 PRJNA398089 SRR5950514 human_gut
+PSMA267R PRJNA398089 SRR5950259 PRJNA398089 SRR5946765 human_gut
+HSMA33QY PRJNA398089 SRR5950260 PRJNA398089 SRR5946767 human_gut
+HSMA33PZ PRJNA398089 SRR5950261 PRJNA398089 SRR5946764 human_gut
+MSM5LLDU PRJNA398089 SRR5950262 PRJNA398089 SRR5946769 human_gut
+MSM5LLDM PRJNA398089 SRR5950263 PRJNA398089 SRR5946770 human_gut
+PSMA267H PRJNA398089 SRR5950264 PRJNA398089 SRR5946766 human_gut
+MSM5LLDQ PRJNA398089 SRR5950265 PRJNA398089 SRR5946768 human_gut
+PSM7J4EF PRJNA398089 SRR5950266 PRJNA398089 SRR5946734 human_gut
+HSMA33IA PRJNA398089 SRR5950267 PRJNA398089 SRR5946738 human_gut
+HSMA33P6 PRJNA398089 SRR5950268 PRJNA398089 SRR5946732 human_gut
+HSMA33OD PRJNA398089 SRR5950269 PRJNA398089 SRR5946736 human_gut
+MSM9VZKC PRJNA398089 SRR5950270 PRJNA398089 SRR5946731 human_gut
+CSMAIG7X PRJNA398089 SRR5950271 PRJNA398089 SRR5950539 human_gut
+HSMA33OT PRJNA398089 SRR5950272 PRJNA398089 SRR5947084 human_gut
+PSM7J15U PRJNA398089 SRR5950273 PRJNA398089 SRR5946735 human_gut
+CSM9X1YV PRJNA398089 SRR5950274 PRJNA398089 SRR5950587 human_gut
+MSM9VZMI PRJNA398089 SRR5950275 PRJNA398089 SRR5946737 human_gut
+PSMA267P PRJNA398089 SRR5950276 PRJNA398089 SRR5946763 human_gut
+HSMA33LJ PRJNA398089 SRR5950277 PRJNA398089 SRR5946717 human_gut
+CSM9X22K PRJNA398089 SRR5950278 PRJNA398089 SRR5950551 human_gut
+MSMA26BR PRJNA398089 SRR5950279 PRJNA398089 SRR5946723 human_gut
+MSM9VZI6 PRJNA398089 SRR5950280 PRJNA398089 SRR5946725 human_gut
+MSMA26AV PRJNA398089 SRR5950281 PRJNA398089 SRR5946722 human_gut
+PSMA269G PRJNA398089 SRR5950282 PRJNA398089 SRR5946727 human_gut
+PSMA269S PRJNA398089 SRR5950283 PRJNA398089 SRR5946724 human_gut
+HSMA33MK PRJNA398089 SRR5950284 PRJNA398089 SRR5946718 human_gut
+HSMA33LH PRJNA398089 SRR5950285 PRJNA398089 SRR5946726 human_gut
+HSMA33M8 PRJNA398089 SRR5950286 PRJNA398089 SRR5946894 human_gut
+HSMA33RF PRJNA398089 SRR5950287 PRJNA398089 SRR5946898 human_gut
+CSM9X23B PRJNA398089 SRR5950288 PRJNA398089 SRR5950553 human_gut
+MSMB4LXY PRJNA398089 SRR5950289 PRJNA398089 SRR5946639 human_gut
+PSMB4MBK PRJNA398089 SRR5950290 PRJNA398089 SRR5946635 human_gut
+MSMAPC64 PRJNA398089 SRR5950291 PRJNA398089 SRR5946637 human_gut
+CSM9X23N PRJNA398089 SRR5950292 PRJNA398089 SRR5950710 human_gut
+HSMA33RT PRJNA398089 SRR5950293 PRJNA398089 SRR5946636 human_gut
+MSMB4LXW PRJNA398089 SRR5950294 PRJNA398089 SRR5946632 human_gut
+MSMAPC5D PRJNA398089 SRR5950295 PRJNA398089 SRR5946630 human_gut
+CSM9X219 PRJNA398089 SRR5950296 PRJNA398089 SRR5950623 human_gut
+PSMA265X PRJNA398089 SRR5950297 PRJNA398089 SRR5946743 human_gut
+PSMA264S PRJNA398089 SRR5950298 PRJNA398089 SRR5947062 human_gut
+PSMA265N PRJNA398089 SRR5950299 PRJNA398089 SRR5946926 human_gut
+MSM9VZKE PRJNA398089 SRR5950300 PRJNA398089 SRR5946925 human_gut
+MSM9VZL5 PRJNA398089 SRR5950301 PRJNA398089 SRR5946924 human_gut
+HSMA33SK PRJNA398089 SRR5950302 PRJNA398089 SRR5946708 human_gut
+MSMB4LZ8 PRJNA398089 SRR5950303 PRJNA398089 SRR5946707 human_gut
+MSMAPC5L PRJNA398089 SRR5950304 PRJNA398089 SRR5946891 human_gut
+MSMAPC6G PRJNA398089 SRR5950305 PRJNA398089 SRR5946887 human_gut
+HSMA33RD PRJNA398089 SRR5950306 PRJNA398089 SRR5946888 human_gut
+CSM9X237 PRJNA398089 SRR5950307 PRJNA398089 SRR5950556 human_gut
+MSMAPC7T PRJNA398089 SRR5950308 PRJNA398089 SRR5946825 human_gut
+MSMAPC59 PRJNA398089 SRR5950309 PRJNA398089 SRR5946892 human_gut
+PSMA269O PRJNA398089 SRR5950310 PRJNA398089 SRR5946893 human_gut
+MSMAPC6K PRJNA398089 SRR5950311 PRJNA398089 SRR5946889 human_gut
+MSMB4LZR PRJNA398089 SRR5950312 PRJNA398089 SRR5946714 human_gut
+MSMB4LZC PRJNA398089 SRR5950313 PRJNA398089 SRR5946713 human_gut
+HSMA33SI PRJNA398089 SRR5950314 PRJNA398089 SRR5946712 human_gut
+MSMB4LZX PRJNA398089 SRR5950315 PRJNA398089 SRR5946711 human_gut
+HSMA33Q6 PRJNA398089 SRR5950316 PRJNA398089 SRR5946886 human_gut
+PSMA26A3 PRJNA398089 SRR5950317 PRJNA398089 SRR5946890 human_gut
+PSMB4MC7 PRJNA398089 SRR5950318 PRJNA398089 SRR5946633 human_gut
+PSMB4MBS PRJNA398089 SRR5950319 PRJNA398089 SRR5946634 human_gut
+HSMA33IC PRJNA398089 SRR5950320 PRJNA398089 SRR5946809 human_gut
+CSM9X1Z4 PRJNA398089 SRR5950321 PRJNA398089 SRR5950616 human_gut
+HSMA33SG PRJNA398089 SRR5950322 PRJNA398089 SRR5946640 human_gut
+HSMA33KE PRJNA398089 SRR5950323 PRJNA398089 SRR5946807 human_gut
+HSMA33JR PRJNA398089 SRR5950324 PRJNA398089 SRR5946678 human_gut
+PSMA2668 PRJNA398089 SRR5950325 PRJNA398089 SRR5946677 human_gut
+HSMA33J3 PRJNA398089 SRR5950326 PRJNA398089 SRR5946931 human_gut
+HSMA33IK PRJNA398089 SRR5950327 PRJNA398089 SRR5946932 human_gut
+HSMA33OZ PRJNA398089 SRR5950328 PRJNA398089 SRR5947016 human_gut
+MSMA26DM PRJNA398089 SRR5950329 PRJNA398089 SRR5947017 human_gut
+PSMA265L PRJNA398089 SRR5950330 PRJNA398089 SRR5946676 human_gut
+MSM9VZP3 PRJNA398089 SRR5950331 PRJNA398089 SRR5946679 human_gut
+CSM9X1Y3 PRJNA398089 SRR5950332 PRJNA398089 SRR5950580 human_gut
+MSM9VZJB PRJNA398089 SRR5950333 PRJNA398089 SRR5947020 human_gut
+MSMA267V PRJNA398089 SRR5950334 PRJNA398089 SRR5947018 human_gut
+MSM9VZKI PRJNA398089 SRR5950335 PRJNA398089 SRR5947019 human_gut
+CSMA9J65 PRJNA398089 SRR5950336 PRJNA398089 SRR5950645 human_gut
+MSM9VZLP PRJNA398089 SRR5950337 PRJNA398089 SRR5935978 human_gut
+HSM7J4K8 PRJNA398089 SRR5950338 PRJNA398089 SRR5936144 human_gut
+CSM7KOUL PRJNA398089 SRR5950339 PRJNA398089 SRR5950650 human_gut
+HSM7J4N6 PRJNA398089 SRR5950340 PRJNA398089 SRR5936145 human_gut
+PSM7J12R PRJNA398089 SRR5950341 PRJNA398089 SRR5936146 human_gut
+MSM9VZF3 PRJNA398089 SRR5950342 PRJNA398089 SRR5935923 human_gut
+PSM7J15W PRJNA398089 SRR5950343 PRJNA398089 SRR5935921 human_gut
+PSM7J14L PRJNA398089 SRR5950344 PRJNA398089 SRR5935920 human_gut
+MSM9VZFF PRJNA398089 SRR5950345 PRJNA398089 SRR5936143 human_gut
+MSM9VZOQ PRJNA398089 SRR5950346 PRJNA398089 SRR5946808 human_gut
+MSM9VZLT PRJNA398089 SRR5950347 PRJNA398089 SRR5946929 human_gut
+MSMA26BX PRJNA398089 SRR5950348 PRJNA398089 SRR5946834 human_gut
+MSMAPC7J PRJNA398089 SRR5950349 PRJNA398089 SRR5946831 human_gut
+MSMAPC6A PRJNA398089 SRR5950350 PRJNA398089 SRR5946833 human_gut
+PSMA269W PRJNA398089 SRR5950351 PRJNA398089 SRR5946835 human_gut
+MSMAPC7R PRJNA398089 SRR5950352 PRJNA398089 SRR5946829 human_gut
+CSM9X235 PRJNA398089 SRR5950353 PRJNA398089 SRR5950595 human_gut
+HSMA33M2 PRJNA398089 SRR5950354 PRJNA398089 SRR5946830 human_gut
+CSM9X22U PRJNA398089 SRR5950355 PRJNA398089 SRR5950591 human_gut
+HSMA33RX PRJNA398089 SRR5950356 PRJNA398089 SRR5946705 human_gut
+MSMB4LYB PRJNA398089 SRR5950357 PRJNA398089 SRR5946706 human_gut
+HSMA33R5 PRJNA398089 SRR5950358 PRJNA398089 SRR5946827 human_gut
+HSMA33QO PRJNA398089 SRR5950359 PRJNA398089 SRR5946828 human_gut
+MSMB4LZK PRJNA398089 SRR5950360 PRJNA398089 SRR5947057 human_gut
+MSM9VZIY PRJNA398089 SRR5950361 PRJNA398089 SRR5947023 human_gut
+MSM9VZHJ PRJNA398089 SRR5950362 PRJNA398089 SRR5947022 human_gut
+PSMA266I PRJNA398089 SRR5950363 PRJNA398089 SRR5946813 human_gut
+MSM9VZHB PRJNA398089 SRR5950364 PRJNA398089 SRR5946803 human_gut
+PSMA264U PRJNA398089 SRR5950365 PRJNA398089 SRR5946804 human_gut
+HSMA33JD PRJNA398089 SRR5950366 PRJNA398089 SRR5946805 human_gut
+MSM9VZP1 PRJNA398089 SRR5950367 PRJNA398089 SRR5947021 human_gut
+MSM9VZIM PRJNA398089 SRR5950368 PRJNA398089 SRR5947025 human_gut
+HSMA33J5 PRJNA398089 SRR5950369 PRJNA398089 SRR5947024 human_gut
+MSM9VZLB PRJNA398089 SRR5950370 PRJNA398089 SRR5946905 human_gut
+PSM7J15O PRJNA398089 SRR5950371 PRJNA398089 SRR5935836 human_gut
+PSM7J14N PRJNA398089 SRR5950372 PRJNA398089 SRR5935878 human_gut
+HSMA33NA PRJNA398089 SRR5950373 PRJNA398089 SRR5936149 human_gut
+MSM9VZES PRJNA398089 SRR5950374 PRJNA398089 SRR5936148 human_gut
+MSM79H7G PRJNA398089 SRR5950375 PRJNA398089 SRR5936152 human_gut
+PSM7J13M PRJNA398089 SRR5950376 PRJNA398089 SRR5936151 human_gut
+CSM9X1XU PRJNA398089 SRR5950377 PRJNA398089 SRR5950643 human_gut
+HSM7J4KK PRJNA398089 SRR5950378 PRJNA398089 SRR5935848 human_gut
+MSM9VZPH PRJNA398089 SRR5950379 PRJNA398089 SRR5935845 human_gut
+CSM7KOTS PRJNA398089 SRR5950380 PRJNA398089 SRR5950688 human_gut
+PSMA265F PRJNA398089 SRR5950381 PRJNA398089 SRR5946806 human_gut
+HSMA33LB PRJNA398089 SRR5950382 PRJNA398089 SRR5947067 human_gut
+MSMA26AX PRJNA398089 SRR5950383 PRJNA398089 SRR5947066 human_gut
+MSM9VZHF PRJNA398089 SRR5950384 PRJNA398089 SRR5946895 human_gut
+PSMA265H PRJNA398089 SRR5950385 PRJNA398089 SRR5946896 human_gut
+HSMA33KU PRJNA398089 SRR5950386 PRJNA398089 SRR5946900 human_gut
+PSMA265B PRJNA398089 SRR5950387 PRJNA398089 SRR5946901 human_gut
+PSMA265T PRJNA398089 SRR5950388 PRJNA398089 SRR5946897 human_gut
+PSMA266M PRJNA398089 SRR5950389 PRJNA398089 SRR5946899 human_gut
+MSM9VZL9 PRJNA398089 SRR5950390 PRJNA398089 SRR5946923 human_gut
+PSMA267J PRJNA398089 SRR5950391 PRJNA398089 SRR5947065 human_gut
+MSMA26EJ PRJNA398089 SRR5950392 PRJNA398089 SRR5947064 human_gut
+HSMA33MZ PRJNA398089 SRR5950393 PRJNA398089 SRR5946963 human_gut
+MSM9VZNL PRJNA398089 SRR5950394 PRJNA398089 SRR5946966 human_gut
+PSMA266U PRJNA398089 SRR5950395 PRJNA398089 SRR5946810 human_gut
+PSM7J13E PRJNA398089 SRR5950396 PRJNA398089 SRR5935824 human_gut
+PSM7J141 PRJNA398089 SRR5950397 PRJNA398089 SRR5935823 human_gut
+MSM9VZOG PRJNA398089 SRR5950398 PRJNA398089 SRR5935849 human_gut
+MSM9VZF5 PRJNA398089 SRR5950399 PRJNA398089 SRR5935898 human_gut
+CSM7KOUN PRJNA398089 SRR5950400 PRJNA398089 SRR5950694 human_gut
+HSM7J4NU PRJNA398089 SRR5950401 PRJNA398089 SRR5946964 human_gut
+HSM7J4KA PRJNA398089 SRR5950402 PRJNA398089 SRR5935821 human_gut
+PSM7J16Y PRJNA398089 SRR5950403 PRJNA398089 SRR5935820 human_gut
+HSMA33JB PRJNA398089 SRR5950404 PRJNA398089 SRR5946928 human_gut
+MSM9VZOS PRJNA398089 SRR5950405 PRJNA398089 SRR5946927 human_gut
+MSMA26EH PRJNA398089 SRR5950406 PRJNA398089 SRR5946655 human_gut
+MSM9VZHN PRJNA398089 SRR5950407 PRJNA398089 SRR5946670 human_gut
+HSMA33MA PRJNA398089 SRR5950408 PRJNA398089 SRR5946906 human_gut
+MSM9VZJ3 PRJNA398089 SRR5950409 PRJNA398089 SRR5946902 human_gut
+MSM9VZHX PRJNA398089 SRR5950410 PRJNA398089 SRR5946672 human_gut
+PSMA263W PRJNA398089 SRR5950411 PRJNA398089 SRR5946673 human_gut
+PSMA265J PRJNA398089 SRR5950412 PRJNA398089 SRR5946610 human_gut
+HSMA33OJ PRJNA398089 SRR5950413 PRJNA398089 SRR5946675 human_gut
+HSMA33OV PRJNA398089 SRR5950414 PRJNA398089 SRR5946739 human_gut
+PSMA266Y PRJNA398089 SRR5950415 PRJNA398089 SRR5946904 human_gut
+MSM9VZIQ PRJNA398089 SRR5950416 PRJNA398089 SRR5946903 human_gut
+PSM7J14P PRJNA398089 SRR5950417 PRJNA398089 SRR5946859 human_gut
+HSMA33IE PRJNA398089 SRR5950418 PRJNA398089 SRR5946860 human_gut
+PSM7J154 PRJNA398089 SRR5950419 PRJNA398089 SRR5946861 human_gut
+MSM9VZLJ PRJNA398089 SRR5950420 PRJNA398089 SRR5946971 human_gut
+HSMA33NO PRJNA398089 SRR5950421 PRJNA398089 SRR5946972 human_gut
+MSM9VZOY PRJNA398089 SRR5950422 PRJNA398089 SRR5946968 human_gut
+MSM9VZMU PRJNA398089 SRR5950423 PRJNA398089 SRR5946969 human_gut
+PSM7J15Q PRJNA398089 SRR5950424 PRJNA398089 SRR5946970 human_gut
+CSM7KORG PRJNA398089 SRR5950425 PRJNA398089 SRR5950750 human_gut
+MSM7J16J PRJNA398089 SRR5950426 PRJNA398089 SRR5946965 human_gut
+PSM7J15A PRJNA398089 SRR5950427 PRJNA398089 SRR5935922 human_gut
+MSM79H8L PRJNA398089 SRR5950428 PRJNA398089 SRR5935924 human_gut
+MSMB4LZ4 PRJNA398089 SRR5950429 PRJNA398089 SRR5946638 human_gut
+CSM7KOTQ PRJNA398089 SRR5950430 PRJNA398089 SRR5950604 human_gut
+PSM7J14X PRJNA398089 SRR5950431 PRJNA398089 SRR5935771 human_gut
+HSM7J4OP PRJNA398089 SRR5950432 PRJNA398089 SRR5935927 human_gut
+HSM7J4NS PRJNA398089 SRR5950433 PRJNA398089 SRR5935925 human_gut
+CSM7KORK PRJNA398089 SRR5950434 PRJNA398089 SRR5950600 human_gut
+MSM9VZFR PRJNA398089 SRR5950435 PRJNA398089 SRR5935929 human_gut
+HSM7J4OE PRJNA398089 SRR5950436 PRJNA398089 SRR5935928 human_gut
+MSM9VZLN PRJNA398089 SRR5950437 PRJNA398089 SRR5935926 human_gut
+CSM9X1ZC PRJNA398089 SRR5950438 PRJNA398089 SRR5950786 human_gut
+MSMA26DO PRJNA398089 SRR5950439 PRJNA398089 SRR5946955 human_gut
+HSMA33MG PRJNA398089 SRR5950440 PRJNA398089 SRR5946956 human_gut
+HSMA33L1 PRJNA398089 SRR5950441 PRJNA398089 SRR5946960 human_gut
+MSM9VZIU PRJNA398089 SRR5950442 PRJNA398089 SRR5946961 human_gut
+MSMA26ET PRJNA398089 SRR5950443 PRJNA398089 SRR5946962 human_gut
+HSMA33MS PRJNA398089 SRR5950444 PRJNA398089 SRR5946959 human_gut
+MSM9VZHT PRJNA398089 SRR5950445 PRJNA398089 SRR5946952 human_gut
+PSMA2675 PRJNA398089 SRR5950446 PRJNA398089 SRR5946953 human_gut
+HSMA33PL PRJNA398089 SRR5950447 PRJNA398089 SRR5946721 human_gut
+MSM9VZPL PRJNA398089 SRR5950448 PRJNA398089 SRR5946856 human_gut
+CSMAG78W PRJNA398089 SRR5950449 PRJNA398089 SRR5950490 human_gut
+HSM7J4O9 PRJNA398089 SRR5950450 PRJNA398089 SRR5946857 human_gut
+PSM7J161 PRJNA398089 SRR5950451 PRJNA398089 SRR5946864 human_gut
+HSM7J4OT PRJNA398089 SRR5950452 PRJNA398089 SRR5946863 human_gut
+MSM9VZOK PRJNA398089 SRR5950453 PRJNA398089 SRR5946866 human_gut
+MSM9VZNX PRJNA398089 SRR5950454 PRJNA398089 SRR5946862 human_gut
+CSM7KOTU PRJNA398089 SRR5950455 PRJNA398089 SRR5950754 human_gut
+CSM7KOTK PRJNA398089 SRR5950456 PRJNA398089 SRR5950760 human_gut
+CSM7KOOF PRJNA398089 SRR5950457 PRJNA398089 SRR5950761 human_gut
+MSMA26DI PRJNA398089 SRR5950458 PRJNA398089 SRR5946958 human_gut
+PSMA266Q PRJNA398089 SRR5950459 PRJNA398089 SRR5946957 human_gut
+CSM9X21J PRJNA398089 SRR5950460 PRJNA398089 SRR5950788 human_gut
+MSMA26BB PRJNA398089 SRR5950461 PRJNA398089 SRR5947072 human_gut
+HSMA33NQ PRJNA398089 SRR5950462 PRJNA398089 SRR5947076 human_gut
+MSMA26EL PRJNA398089 SRR5950463 PRJNA398089 SRR5947073 human_gut
+MSMA26AZ PRJNA398089 SRR5950464 PRJNA398089 SRR5947071 human_gut
+HSMA33ME PRJNA398089 SRR5950465 PRJNA398089 SRR5947068 human_gut
+CSM9X21T PRJNA398089 SRR5950466 PRJNA398089 SRR5950655 human_gut
+MSM9VZGY PRJNA398089 SRR5950467 PRJNA398089 SRR5947069 human_gut
+MSM9VZI2 PRJNA398089 SRR5950468 PRJNA398089 SRR5947070 human_gut
+CSM9X213 PRJNA398089 SRR5950469 PRJNA398089 SRR5950619 human_gut
+MSM9VZLV PRJNA398089 SRR5950470 PRJNA398089 SRR5947082 human_gut
+PSM7J171 PRJNA398089 SRR5963912 PRJNA398089 SRR5946967 human_gut
+PSM6XBVI PRJNA398089 SRR5963913 PRJNA398089 SRR5936034 human_gut
+PSM7J12D PRJNA398089 SRR5963914 PRJNA398089 SRR5936150 human_gut
+PSM6XBSO PRJNA398089 SRR5963915 PRJNA398089 SRR5936032 human_gut
+PSM6XBT3 PRJNA398089 SRR5963916 PRJNA398089 SRR5936057 human_gut
+PSM6XBSI PRJNA398089 SRR5963917 PRJNA398089 SRR5936074 human_gut
+PSM6XBSM PRJNA398089 SRR5963918 PRJNA398089 SRR5950713 human_gut
+MSM79HDQ_TR PRJNA398089 SRR5963920 PRJNA398089 SRR5962901 human_gut
+MSM79HDG_TR PRJNA398089 SRR5963921 PRJNA398089 SRR5962900 human_gut
+MSM9VZEK_TR PRJNA398089 SRR5963922 PRJNA398089 SRR5962903 human_gut
+MSM9VZMA_TR PRJNA398089 SRR5963923 PRJNA398089 SRR5962902 human_gut
+PSMA265J_TR PRJNA398089 SRR5963924 PRJNA398089 SRR5962904 human_gut
+MSMA26AZ_TR PRJNA398089 SRR5963925 PRJNA398089 SRR5962908 human_gut
+HSMA33LH_TR PRJNA398089 SRR5963926 PRJNA398089 SRR5962907 human_gut
+MSM9VZNH_TR PRJNA398089 SRR5963927 PRJNA398089 SRR5962906 human_gut
+HSMA33RX_TR PRJNA398089 SRR5963928 PRJNA398089 SRR5962885 human_gut
+PSM6XBQY_TR PRJNA398089 SRR5963929 PRJNA398089 SRR5962895 human_gut
+CSM79HLA_TR PRJNA398089 SRR5963930 PRJNA398089 SRR5962894 human_gut
+HSM6XRVC_TR PRJNA398089 SRR5963931 PRJNA398089 SRR5962893 human_gut
+HSM5MD6A_TR PRJNA398089 SRR5963932 PRJNA398089 SRR5962891 human_gut
+PSM6XBRK_TR PRJNA398089 SRR5963933 PRJNA398089 SRR5962890 human_gut
+CSM67UEW_TR PRJNA398089 SRR5963934 PRJNA398089 SRR5962889 human_gut
+HSM67VEM_TR PRJNA398089 SRR5963935 PRJNA398089 SRR5962892 human_gut
+CSM67UDR_TR PRJNA398089 SRR5963936 PRJNA398089 SRR5962888 human_gut
+PSM6XBSU_TR PRJNA398089 SRR5963937 PRJNA398089 SRR5962897 human_gut
+MSM79HF9_TR PRJNA398089 SRR5963938 PRJNA398089 SRR5962898 human_gut
+PSM6XBRK PRJNA398089 SRR5963939 PRJNA398089 SRR5935960 human_gut
+PSM6XBQY PRJNA398089 SRR5963940 PRJNA398089 SRR5935809 human_gut
+MSMA26EP PRJNA398089 SRR5963941 PRJNA398089 SRR5946720 human_gut
+MSM9VZOW PRJNA398089 SRR5963942 PRJNA398089 SRR5946733 human_gut
+PSM6XBQU PRJNA398089 SRR5963943 PRJNA398089 SRR5936007 human_gut
+MSMAPC5Z PRJNA398089 SRR5963944 PRJNA398089 SRR5946832 human_gut
+MSM6J2M3 PRJNA398089 SRR5963945 PRJNA398089 SRR5935963 human_gut
+MSM6J2K6 PRJNA398089 SRR5963946 PRJNA398089 SRR5935938 human_gut
+MSM6J2Q1 PRJNA398089 SRR5963947 PRJNA398089 SRR5936013 human_gut
+MSM6J2PS PRJNA398089 SRR5963948 PRJNA398089 SRR5936231 human_gut
+Amar_Carlsberg-38I-DV129-2-N2 PRJNA632343 SRR11781625 PRJNA632343 SRR11781636 snail_gut
+Amar_Carlsberg-38I-DV131-6-Intestine PRJNA632343 SRR11781640 PRJNA632343 SRR11781639 snail_gut
+Amar_Carlsberg-38I-DV129-15-Intestine PRJNA632343 SRR11781643 PRJNA632343 SRR11781642 snail_gut
+Amar_Carlsberg-38I-DV129-5-Intestine PRJNA632343 SRR11781646 PRJNA632343 SRR11781645 snail_gut
+BATS_256_C3_1B PRJNA242360 SRR1230757 PRJNA242360 SRR1230754 ocean
+BATS_261_C2_B2 PRJNA242360 SRR1230758 PRJNA242360 SRR1230755 ocean
+BATS_261_C8_F2 PRJNA242360 SRR1230759 PRJNA242360 SRR1230756 ocean
+BATS_256_C10_5A PRJNA242360 SRR1238005 PRJNA242360 SRR1230729 ocean
+ES_08072017_CAT2012July PRJEB12234 ERR2088997 PRJEB12234 ERR2094669 ocean
+ES_08072017_CAT2012October PRJEB12234 ERR2088998 PRJEB12234 ERR2094670 ocean
+ES_08072017_CAT2013January PRJEB12234 ERR2088999 PRJEB12234 ERR2094671 ocean
+ES_08072017_CAT2013April PRJEB12234 ERR2089000 PRJEB12234 ERR2094672 ocean
+ES_08072017_POLA2012July PRJEB12234 ERR2089001 PRJEB12234 ERR2094673 ocean
+ES_08072017_POLA2012October PRJEB12234 ERR2089002 PRJEB12234 ERR2094674 ocean
+ES_08072017_POLA2013January PRJEB12234 ERR2089003 PRJEB12234 ERR2094675 ocean
+ES_08072017_POLA2013April PRJEB12234 ERR2089004 PRJEB12234 ERR2094676 ocean
+ES_08072017_SPOT2012July PRJEB12234 ERR2089005 PRJEB12234 ERR2094677 ocean
+ES_08072017_SPOT2012October PRJEB12234 ERR2089006 PRJEB12234 ERR2094678 ocean
+ES_08072017_SPOT2013January PRJEB12234 ERR2089007 PRJEB12234 ERR2094679 ocean
+ES_08072017_SPOT2013April PRJEB12234 ERR2089008 PRJEB12234 ERR2094680 ocean
+ES_08072017_neg_reads PRJEB12234 ERR2089009 PRJEB12234 ERR2097133 ocean
+E2A PRJNA340003 SRR4342131 PRJNA340003 SRR4342135 ocean
+E4 PRJNA340003 SRR4342132 PRJNA340003 SRR4342136 ocean
+D1 PRJNA340003 SRR4342137 PRJNA340003 SRR4342129 ocean
+D2 PRJNA340003 SRR4342138 PRJNA340003 SRR4342130 ocean
+D3 PRJNA340003 SRR4342139 PRJNA340003 SRR4342133 ocean
+E2 PRJNA340003 SRR4342140 PRJNA340003 SRR4342134 ocean
diff --git a/inputs/metadata-parse-paired-mgx-mtx.ipynb b/inputs/metadata-parse-paired-mgx-mtx.ipynb
new file mode 100644
index 0000000..3d17930
--- /dev/null
+++ b/inputs/metadata-parse-paired-mgx-mtx.ipynb
@@ -0,0 +1,5114 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3910cfa9",
+ "metadata": {},
+ "source": [
+ "# Generate a metadata table specifying paired metagenomes and metatranscriptomes from diverse biomes using publicly available data\n",
+ "\n",
+ "This notebook processes metadata associated with paired metagenome and metatranscriptome samples. \n",
+ "When possible, the ENA BioProject pages were parsed to determine sample pairs.\n",
+ "This was done programmatically using the following URL (with more metadata fields specified) which uses the ENA API to resolve a TSV for each run in a specified bioproject.\n",
+ "\n",
+ "`\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA406858&result=read_run&fields=study_accession,first_created&format=tsv&download=true&limit=0\"`\n",
+ "\n",
+ "\n",
+ "By and large, each project encoded sample identifiers and paired information differently, so each parsing chunk is unique.\n",
+ "Given this, I chose not to create a parsing function.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6232c715",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "setwd(\"..\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "ca7b2c35",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library(dplyr)\n",
+ "library(readr)\n",
+ "library(janitor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3206624e",
+ "metadata": {},
+ "source": [
+ "## PRJNA406858"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b79e6ec7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna406858 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA406858&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "32481186",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "A tibble: 6 × 3\n",
+ "\n",
+ "\tstudy_accession | run_accession | library_name |
\n",
+ "\t<chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tPRJNA406858 | SRR6032600 | C_2_cDNA_1 |
\n",
+ "\tPRJNA406858 | SRR6032601 | B_2_1 |
\n",
+ "\tPRJNA406858 | SRR6032602 | C_2_1 |
\n",
+ "\tPRJNA406858 | SRR6032603 | E_2_1 |
\n",
+ "\tPRJNA406858 | SRR6032604 | B_2_cDNA_1 |
\n",
+ "\tPRJNA406858 | SRR6032605 | E_2_cDNA_1 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 6 × 3\n",
+ "\\begin{tabular}{lll}\n",
+ " study\\_accession & run\\_accession & library\\_name\\\\\n",
+ " & & \\\\\n",
+ "\\hline\n",
+ "\t PRJNA406858 & SRR6032600 & C\\_2\\_cDNA\\_1\\\\\n",
+ "\t PRJNA406858 & SRR6032601 & B\\_2\\_1 \\\\\n",
+ "\t PRJNA406858 & SRR6032602 & C\\_2\\_1 \\\\\n",
+ "\t PRJNA406858 & SRR6032603 & E\\_2\\_1 \\\\\n",
+ "\t PRJNA406858 & SRR6032604 & B\\_2\\_cDNA\\_1\\\\\n",
+ "\t PRJNA406858 & SRR6032605 & E\\_2\\_cDNA\\_1\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 6 × 3\n",
+ "\n",
+ "| study_accession <chr> | run_accession <chr> | library_name <chr> |\n",
+ "|---|---|---|\n",
+ "| PRJNA406858 | SRR6032600 | C_2_cDNA_1 |\n",
+ "| PRJNA406858 | SRR6032601 | B_2_1 |\n",
+ "| PRJNA406858 | SRR6032602 | C_2_1 |\n",
+ "| PRJNA406858 | SRR6032603 | E_2_1 |\n",
+ "| PRJNA406858 | SRR6032604 | B_2_cDNA_1 |\n",
+ "| PRJNA406858 | SRR6032605 | E_2_cDNA_1 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " study_accession run_accession library_name\n",
+ "1 PRJNA406858 SRR6032600 C_2_cDNA_1 \n",
+ "2 PRJNA406858 SRR6032601 B_2_1 \n",
+ "3 PRJNA406858 SRR6032602 C_2_1 \n",
+ "4 PRJNA406858 SRR6032603 E_2_1 \n",
+ "5 PRJNA406858 SRR6032604 B_2_cDNA_1 \n",
+ "6 PRJNA406858 SRR6032605 E_2_cDNA_1 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna406858 <- prjna406858 %>% \n",
+ " arrange(sample_alias) %>%\n",
+ " select(study_accession, run_accession, library_name)\n",
+ "\n",
+ "head(prjna406858)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "998e512d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna406858 <- prjna406858 %>%\n",
+ " mutate(sample_name = gsub(\"_cDNA\", \"\", library_name))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "59810eff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 3 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tC_2_1 | PRJNA406858 | SRR6032600 | PRJNA406858 | SRR6032602 | activated_sludge |
\n",
+ "\tB_2_1 | PRJNA406858 | SRR6032604 | PRJNA406858 | SRR6032601 | activated_sludge |
\n",
+ "\tE_2_1 | PRJNA406858 | SRR6032605 | PRJNA406858 | SRR6032603 | activated_sludge |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 3 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t C\\_2\\_1 & PRJNA406858 & SRR6032600 & PRJNA406858 & SRR6032602 & activated\\_sludge\\\\\n",
+ "\t B\\_2\\_1 & PRJNA406858 & SRR6032604 & PRJNA406858 & SRR6032601 & activated\\_sludge\\\\\n",
+ "\t E\\_2\\_1 & PRJNA406858 & SRR6032605 & PRJNA406858 & SRR6032603 & activated\\_sludge\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 3 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| C_2_1 | PRJNA406858 | SRR6032600 | PRJNA406858 | SRR6032602 | activated_sludge |\n",
+ "| B_2_1 | PRJNA406858 | SRR6032604 | PRJNA406858 | SRR6032601 | activated_sludge |\n",
+ "| E_2_1 | PRJNA406858 | SRR6032605 | PRJNA406858 | SRR6032603 | activated_sludge |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 C_2_1 PRJNA406858 SRR6032600 PRJNA406858 \n",
+ "2 B_2_1 PRJNA406858 SRR6032604 PRJNA406858 \n",
+ "3 E_2_1 PRJNA406858 SRR6032605 PRJNA406858 \n",
+ " mgx_run_accession sample_type \n",
+ "1 SRR6032602 activated_sludge\n",
+ "2 SRR6032601 activated_sludge\n",
+ "3 SRR6032603 activated_sludge"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna406858_mtx <- prjna406858 %>%\n",
+ " filter(grepl(\"cDNA\", library_name)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna406858_mgx <- prjna406858 %>%\n",
+ " filter(!grepl(\"cDNA\", library_name)) %>%\n",
+ " select(mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession, \n",
+ " sample_name)\n",
+ "\n",
+ "prjna406858_mtx_vs_mgx <- left_join(prjna406858_mtx, prjna406858_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"activated_sludge\")\n",
+ "\n",
+ "prjna406858_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "efa62e72",
+ "metadata": {},
+ "source": [
+ "## PRJNA448333"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ab5a6d12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna448333 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA448333&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "3a4dec6c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 6 × 4\n",
+ "\n",
+ "\tstudy_accession | run_accession | library_name | sample_alias |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tPRJNA448333 | SRR8397906 | Metagenome 101 | Rumen microbiome of beef cattle 101 |
\n",
+ "\tPRJNA448333 | SRR8399431 | Total-RNA-based metatranscriptome 101 | Rumen microbiome of beef cattle 101 |
\n",
+ "\tPRJNA448333 | SRR8416057 | mRNA-enriched metatranscriptome 101 | Rumen microbiome of beef cattle 101 |
\n",
+ "\tPRJNA448333 | SRR8404214 | Metagenome 103 | Rumen microbiome of beef cattle 103 |
\n",
+ "\tPRJNA448333 | SRR8416058 | mRNA-enriched metatranscriptome 103 | Rumen microbiome of beef cattle 103 |
\n",
+ "\tPRJNA448333 | SRR8420492 | Total-RNA-based metatranscriptome 103 | Rumen microbiome of beef cattle 103 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 6 × 4\n",
+ "\\begin{tabular}{llll}\n",
+ " study\\_accession & run\\_accession & library\\_name & sample\\_alias\\\\\n",
+ " & & & \\\\\n",
+ "\\hline\n",
+ "\t PRJNA448333 & SRR8397906 & Metagenome 101 & Rumen microbiome of beef cattle 101\\\\\n",
+ "\t PRJNA448333 & SRR8399431 & Total-RNA-based metatranscriptome 101 & Rumen microbiome of beef cattle 101\\\\\n",
+ "\t PRJNA448333 & SRR8416057 & mRNA-enriched metatranscriptome 101 & Rumen microbiome of beef cattle 101\\\\\n",
+ "\t PRJNA448333 & SRR8404214 & Metagenome 103 & Rumen microbiome of beef cattle 103\\\\\n",
+ "\t PRJNA448333 & SRR8416058 & mRNA-enriched metatranscriptome 103 & Rumen microbiome of beef cattle 103\\\\\n",
+ "\t PRJNA448333 & SRR8420492 & Total-RNA-based metatranscriptome 103 & Rumen microbiome of beef cattle 103\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 6 × 4\n",
+ "\n",
+ "| study_accession <chr> | run_accession <chr> | library_name <chr> | sample_alias <chr> |\n",
+ "|---|---|---|---|\n",
+ "| PRJNA448333 | SRR8397906 | Metagenome 101 | Rumen microbiome of beef cattle 101 |\n",
+ "| PRJNA448333 | SRR8399431 | Total-RNA-based metatranscriptome 101 | Rumen microbiome of beef cattle 101 |\n",
+ "| PRJNA448333 | SRR8416057 | mRNA-enriched metatranscriptome 101 | Rumen microbiome of beef cattle 101 |\n",
+ "| PRJNA448333 | SRR8404214 | Metagenome 103 | Rumen microbiome of beef cattle 103 |\n",
+ "| PRJNA448333 | SRR8416058 | mRNA-enriched metatranscriptome 103 | Rumen microbiome of beef cattle 103 |\n",
+ "| PRJNA448333 | SRR8420492 | Total-RNA-based metatranscriptome 103 | Rumen microbiome of beef cattle 103 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " study_accession run_accession library_name \n",
+ "1 PRJNA448333 SRR8397906 Metagenome 101 \n",
+ "2 PRJNA448333 SRR8399431 Total-RNA-based metatranscriptome 101\n",
+ "3 PRJNA448333 SRR8416057 mRNA-enriched metatranscriptome 101 \n",
+ "4 PRJNA448333 SRR8404214 Metagenome 103 \n",
+ "5 PRJNA448333 SRR8416058 mRNA-enriched metatranscriptome 103 \n",
+ "6 PRJNA448333 SRR8420492 Total-RNA-based metatranscriptome 103\n",
+ " sample_alias \n",
+ "1 Rumen microbiome of beef cattle 101\n",
+ "2 Rumen microbiome of beef cattle 101\n",
+ "3 Rumen microbiome of beef cattle 101\n",
+ "4 Rumen microbiome of beef cattle 103\n",
+ "5 Rumen microbiome of beef cattle 103\n",
+ "6 Rumen microbiome of beef cattle 103"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna448333 <- prjna448333 %>% \n",
+ " arrange(sample_alias) %>%\n",
+ " select(study_accession, run_accession, library_name, sample_alias)\n",
+ "\n",
+ "head(prjna448333)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6cc53500",
+ "metadata": {},
+ "source": [
+ "PRJNA448333 has total RNA and mRNA-enriched RNA metatranscriptomes. I included mRNA-enriched samples as these likely contain more of the sequences we're interested in."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "535d6fcf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna448333_mtx <- prjna448333 %>% \n",
+ " filter(grepl(\"mRNA-enriched\", library_name)) %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna448333_mgx <- prjna448333 %>% \n",
+ " filter(grepl(\"Metagenome\", library_name)) %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "958b2644",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna448333_mtx_vs_mgx <- left_join(prjna448333_mtx, prjna448333_mgx, by = c(\"sample_name\")) %>%\n",
+ " mutate(sample_name = make_clean_names(sample_name),\n",
+ " sample_type = \"cattle_rumen\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "3da0c395",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 48 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\trumen_microbiome_of_beef_cattle_101 | PRJNA448333 | SRR8416057 | PRJNA448333 | SRR8397906 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_103 | PRJNA448333 | SRR8416058 | PRJNA448333 | SRR8404214 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_104 | PRJNA448333 | SRR8416055 | PRJNA448333 | SRR8397905 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_105 | PRJNA448333 | SRR8416056 | PRJNA448333 | SRR8397904 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_106 | PRJNA448333 | SRR8416061 | PRJNA448333 | SRR8397903 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_107 | PRJNA448333 | SRR8416062 | PRJNA448333 | SRR8397910 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_112 | PRJNA448333 | SRR8416059 | PRJNA448333 | SRR8397909 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_201 | PRJNA448333 | SRR8416060 | PRJNA448333 | SRR8397908 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_202 | PRJNA448333 | SRR8416064 | PRJNA448333 | SRR8397907 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_203 | PRJNA448333 | SRR8416065 | PRJNA448333 | SRR8397902 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_205 | PRJNA448333 | SRR8416069 | PRJNA448333 | SRR8397901 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_206 | PRJNA448333 | SRR8416070 | PRJNA448333 | SRR8397884 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_207 | PRJNA448333 | SRR8416067 | PRJNA448333 | SRR8397883 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_208 | PRJNA448333 | SRR8416068 | PRJNA448333 | SRR8397886 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_209 | PRJNA448333 | SRR8416073 | PRJNA448333 | SRR8397885 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_210 | PRJNA448333 | SRR8416074 | PRJNA448333 | SRR8397888 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_301 | PRJNA448333 | SRR8416071 | PRJNA448333 | SRR8397887 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_302 | PRJNA448333 | SRR8416072 | PRJNA448333 | SRR8397890 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_303 | PRJNA448333 | SRR8416075 | PRJNA448333 | SRR8397889 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_304 | PRJNA448333 | SRR8416076 | PRJNA448333 | SRR8397882 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_305 | PRJNA448333 | SRR8416092 | PRJNA448333 | SRR8397881 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_307 | PRJNA448333 | SRR8416091 | PRJNA448333 | SRR8397917 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_308 | PRJNA448333 | SRR8416090 | PRJNA448333 | SRR8397918 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_401 | PRJNA448333 | SRR8416089 | PRJNA448333 | SRR8397919 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_402 | PRJNA448333 | SRR8416096 | PRJNA448333 | SRR8397920 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_403 | PRJNA448333 | SRR8416095 | PRJNA448333 | SRR8397913 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_404 | PRJNA448333 | SRR8416094 | PRJNA448333 | SRR8397914 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_407 | PRJNA448333 | SRR8416093 | PRJNA448333 | SRR8397915 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_501 | PRJNA448333 | SRR8416088 | PRJNA448333 | SRR8397916 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_503 | PRJNA448333 | SRR8416087 | PRJNA448333 | SRR8397911 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_505 | PRJNA448333 | SRR8416102 | PRJNA448333 | SRR8397912 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_506 | PRJNA448333 | SRR8416083 | PRJNA448333 | SRR8397894 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_601 | PRJNA448333 | SRR8416063 | PRJNA448333 | SRR8397893 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_604 | PRJNA448333 | SRR8416066 | PRJNA448333 | SRR8397892 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_608 | PRJNA448333 | SRR8416097 | PRJNA448333 | SRR8397891 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_611 | PRJNA448333 | SRR8416098 | PRJNA448333 | SRR8397898 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_612 | PRJNA448333 | SRR8416099 | PRJNA448333 | SRR8397897 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_701 | PRJNA448333 | SRR8416100 | PRJNA448333 | SRR8397896 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_702 | PRJNA448333 | SRR8416077 | PRJNA448333 | SRR8397895 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_703 | PRJNA448333 | SRR8416078 | PRJNA448333 | SRR8397900 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_704 | PRJNA448333 | SRR8416082 | PRJNA448333 | SRR8397899 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_705 | PRJNA448333 | SRR8416081 | PRJNA448333 | SRR8397926 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_706 | PRJNA448333 | SRR8416086 | PRJNA448333 | SRR8397927 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_707 | PRJNA448333 | SRR8416084 | PRJNA448333 | SRR8397924 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_708 | PRJNA448333 | SRR8416101 | PRJNA448333 | SRR8397925 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_709 | PRJNA448333 | SRR8416085 | PRJNA448333 | SRR8397922 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_710 | PRJNA448333 | SRR8416080 | PRJNA448333 | SRR8397923 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_712 | PRJNA448333 | SRR8416079 | PRJNA448333 | SRR8397921 | cattle_rumen |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 48 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_101 & PRJNA448333 & SRR8416057 & PRJNA448333 & SRR8397906 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_103 & PRJNA448333 & SRR8416058 & PRJNA448333 & SRR8404214 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_104 & PRJNA448333 & SRR8416055 & PRJNA448333 & SRR8397905 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_105 & PRJNA448333 & SRR8416056 & PRJNA448333 & SRR8397904 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_106 & PRJNA448333 & SRR8416061 & PRJNA448333 & SRR8397903 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_107 & PRJNA448333 & SRR8416062 & PRJNA448333 & SRR8397910 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_112 & PRJNA448333 & SRR8416059 & PRJNA448333 & SRR8397909 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_201 & PRJNA448333 & SRR8416060 & PRJNA448333 & SRR8397908 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_202 & PRJNA448333 & SRR8416064 & PRJNA448333 & SRR8397907 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_203 & PRJNA448333 & SRR8416065 & PRJNA448333 & SRR8397902 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_205 & PRJNA448333 & SRR8416069 & PRJNA448333 & SRR8397901 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_206 & PRJNA448333 & SRR8416070 & PRJNA448333 & SRR8397884 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_207 & PRJNA448333 & SRR8416067 & PRJNA448333 & SRR8397883 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_208 & PRJNA448333 & SRR8416068 & PRJNA448333 & SRR8397886 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_209 & PRJNA448333 & SRR8416073 & PRJNA448333 & SRR8397885 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_210 & PRJNA448333 & SRR8416074 & PRJNA448333 & SRR8397888 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_301 & PRJNA448333 & SRR8416071 & PRJNA448333 & SRR8397887 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_302 & PRJNA448333 & SRR8416072 & PRJNA448333 & SRR8397890 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_303 & PRJNA448333 & SRR8416075 & PRJNA448333 & SRR8397889 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_304 & PRJNA448333 & SRR8416076 & PRJNA448333 & SRR8397882 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_305 & PRJNA448333 & SRR8416092 & PRJNA448333 & SRR8397881 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_307 & PRJNA448333 & SRR8416091 & PRJNA448333 & SRR8397917 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_308 & PRJNA448333 & SRR8416090 & PRJNA448333 & SRR8397918 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_401 & PRJNA448333 & SRR8416089 & PRJNA448333 & SRR8397919 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_402 & PRJNA448333 & SRR8416096 & PRJNA448333 & SRR8397920 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_403 & PRJNA448333 & SRR8416095 & PRJNA448333 & SRR8397913 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_404 & PRJNA448333 & SRR8416094 & PRJNA448333 & SRR8397914 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_407 & PRJNA448333 & SRR8416093 & PRJNA448333 & SRR8397915 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_501 & PRJNA448333 & SRR8416088 & PRJNA448333 & SRR8397916 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_503 & PRJNA448333 & SRR8416087 & PRJNA448333 & SRR8397911 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_505 & PRJNA448333 & SRR8416102 & PRJNA448333 & SRR8397912 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_506 & PRJNA448333 & SRR8416083 & PRJNA448333 & SRR8397894 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_601 & PRJNA448333 & SRR8416063 & PRJNA448333 & SRR8397893 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_604 & PRJNA448333 & SRR8416066 & PRJNA448333 & SRR8397892 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_608 & PRJNA448333 & SRR8416097 & PRJNA448333 & SRR8397891 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_611 & PRJNA448333 & SRR8416098 & PRJNA448333 & SRR8397898 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_612 & PRJNA448333 & SRR8416099 & PRJNA448333 & SRR8397897 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_701 & PRJNA448333 & SRR8416100 & PRJNA448333 & SRR8397896 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_702 & PRJNA448333 & SRR8416077 & PRJNA448333 & SRR8397895 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_703 & PRJNA448333 & SRR8416078 & PRJNA448333 & SRR8397900 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_704 & PRJNA448333 & SRR8416082 & PRJNA448333 & SRR8397899 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_705 & PRJNA448333 & SRR8416081 & PRJNA448333 & SRR8397926 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_706 & PRJNA448333 & SRR8416086 & PRJNA448333 & SRR8397927 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_707 & PRJNA448333 & SRR8416084 & PRJNA448333 & SRR8397924 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_708 & PRJNA448333 & SRR8416101 & PRJNA448333 & SRR8397925 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_709 & PRJNA448333 & SRR8416085 & PRJNA448333 & SRR8397922 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_710 & PRJNA448333 & SRR8416080 & PRJNA448333 & SRR8397923 & cattle\\_rumen\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_712 & PRJNA448333 & SRR8416079 & PRJNA448333 & SRR8397921 & cattle\\_rumen\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 48 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| rumen_microbiome_of_beef_cattle_101 | PRJNA448333 | SRR8416057 | PRJNA448333 | SRR8397906 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_103 | PRJNA448333 | SRR8416058 | PRJNA448333 | SRR8404214 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_104 | PRJNA448333 | SRR8416055 | PRJNA448333 | SRR8397905 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_105 | PRJNA448333 | SRR8416056 | PRJNA448333 | SRR8397904 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_106 | PRJNA448333 | SRR8416061 | PRJNA448333 | SRR8397903 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_107 | PRJNA448333 | SRR8416062 | PRJNA448333 | SRR8397910 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_112 | PRJNA448333 | SRR8416059 | PRJNA448333 | SRR8397909 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_201 | PRJNA448333 | SRR8416060 | PRJNA448333 | SRR8397908 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_202 | PRJNA448333 | SRR8416064 | PRJNA448333 | SRR8397907 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_203 | PRJNA448333 | SRR8416065 | PRJNA448333 | SRR8397902 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_205 | PRJNA448333 | SRR8416069 | PRJNA448333 | SRR8397901 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_206 | PRJNA448333 | SRR8416070 | PRJNA448333 | SRR8397884 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_207 | PRJNA448333 | SRR8416067 | PRJNA448333 | SRR8397883 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_208 | PRJNA448333 | SRR8416068 | PRJNA448333 | SRR8397886 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_209 | PRJNA448333 | SRR8416073 | PRJNA448333 | SRR8397885 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_210 | PRJNA448333 | SRR8416074 | PRJNA448333 | SRR8397888 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_301 | PRJNA448333 | SRR8416071 | PRJNA448333 | SRR8397887 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_302 | PRJNA448333 | SRR8416072 | PRJNA448333 | SRR8397890 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_303 | PRJNA448333 | SRR8416075 | PRJNA448333 | SRR8397889 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_304 | PRJNA448333 | SRR8416076 | PRJNA448333 | SRR8397882 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_305 | PRJNA448333 | SRR8416092 | PRJNA448333 | SRR8397881 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_307 | PRJNA448333 | SRR8416091 | PRJNA448333 | SRR8397917 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_308 | PRJNA448333 | SRR8416090 | PRJNA448333 | SRR8397918 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_401 | PRJNA448333 | SRR8416089 | PRJNA448333 | SRR8397919 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_402 | PRJNA448333 | SRR8416096 | PRJNA448333 | SRR8397920 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_403 | PRJNA448333 | SRR8416095 | PRJNA448333 | SRR8397913 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_404 | PRJNA448333 | SRR8416094 | PRJNA448333 | SRR8397914 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_407 | PRJNA448333 | SRR8416093 | PRJNA448333 | SRR8397915 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_501 | PRJNA448333 | SRR8416088 | PRJNA448333 | SRR8397916 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_503 | PRJNA448333 | SRR8416087 | PRJNA448333 | SRR8397911 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_505 | PRJNA448333 | SRR8416102 | PRJNA448333 | SRR8397912 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_506 | PRJNA448333 | SRR8416083 | PRJNA448333 | SRR8397894 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_601 | PRJNA448333 | SRR8416063 | PRJNA448333 | SRR8397893 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_604 | PRJNA448333 | SRR8416066 | PRJNA448333 | SRR8397892 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_608 | PRJNA448333 | SRR8416097 | PRJNA448333 | SRR8397891 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_611 | PRJNA448333 | SRR8416098 | PRJNA448333 | SRR8397898 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_612 | PRJNA448333 | SRR8416099 | PRJNA448333 | SRR8397897 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_701 | PRJNA448333 | SRR8416100 | PRJNA448333 | SRR8397896 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_702 | PRJNA448333 | SRR8416077 | PRJNA448333 | SRR8397895 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_703 | PRJNA448333 | SRR8416078 | PRJNA448333 | SRR8397900 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_704 | PRJNA448333 | SRR8416082 | PRJNA448333 | SRR8397899 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_705 | PRJNA448333 | SRR8416081 | PRJNA448333 | SRR8397926 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_706 | PRJNA448333 | SRR8416086 | PRJNA448333 | SRR8397927 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_707 | PRJNA448333 | SRR8416084 | PRJNA448333 | SRR8397924 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_708 | PRJNA448333 | SRR8416101 | PRJNA448333 | SRR8397925 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_709 | PRJNA448333 | SRR8416085 | PRJNA448333 | SRR8397922 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_710 | PRJNA448333 | SRR8416080 | PRJNA448333 | SRR8397923 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_712 | PRJNA448333 | SRR8416079 | PRJNA448333 | SRR8397921 | cattle_rumen |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession\n",
+ "1 rumen_microbiome_of_beef_cattle_101 PRJNA448333 SRR8416057 \n",
+ "2 rumen_microbiome_of_beef_cattle_103 PRJNA448333 SRR8416058 \n",
+ "3 rumen_microbiome_of_beef_cattle_104 PRJNA448333 SRR8416055 \n",
+ "4 rumen_microbiome_of_beef_cattle_105 PRJNA448333 SRR8416056 \n",
+ "5 rumen_microbiome_of_beef_cattle_106 PRJNA448333 SRR8416061 \n",
+ "6 rumen_microbiome_of_beef_cattle_107 PRJNA448333 SRR8416062 \n",
+ "7 rumen_microbiome_of_beef_cattle_112 PRJNA448333 SRR8416059 \n",
+ "8 rumen_microbiome_of_beef_cattle_201 PRJNA448333 SRR8416060 \n",
+ "9 rumen_microbiome_of_beef_cattle_202 PRJNA448333 SRR8416064 \n",
+ "10 rumen_microbiome_of_beef_cattle_203 PRJNA448333 SRR8416065 \n",
+ "11 rumen_microbiome_of_beef_cattle_205 PRJNA448333 SRR8416069 \n",
+ "12 rumen_microbiome_of_beef_cattle_206 PRJNA448333 SRR8416070 \n",
+ "13 rumen_microbiome_of_beef_cattle_207 PRJNA448333 SRR8416067 \n",
+ "14 rumen_microbiome_of_beef_cattle_208 PRJNA448333 SRR8416068 \n",
+ "15 rumen_microbiome_of_beef_cattle_209 PRJNA448333 SRR8416073 \n",
+ "16 rumen_microbiome_of_beef_cattle_210 PRJNA448333 SRR8416074 \n",
+ "17 rumen_microbiome_of_beef_cattle_301 PRJNA448333 SRR8416071 \n",
+ "18 rumen_microbiome_of_beef_cattle_302 PRJNA448333 SRR8416072 \n",
+ "19 rumen_microbiome_of_beef_cattle_303 PRJNA448333 SRR8416075 \n",
+ "20 rumen_microbiome_of_beef_cattle_304 PRJNA448333 SRR8416076 \n",
+ "21 rumen_microbiome_of_beef_cattle_305 PRJNA448333 SRR8416092 \n",
+ "22 rumen_microbiome_of_beef_cattle_307 PRJNA448333 SRR8416091 \n",
+ "23 rumen_microbiome_of_beef_cattle_308 PRJNA448333 SRR8416090 \n",
+ "24 rumen_microbiome_of_beef_cattle_401 PRJNA448333 SRR8416089 \n",
+ "25 rumen_microbiome_of_beef_cattle_402 PRJNA448333 SRR8416096 \n",
+ "26 rumen_microbiome_of_beef_cattle_403 PRJNA448333 SRR8416095 \n",
+ "27 rumen_microbiome_of_beef_cattle_404 PRJNA448333 SRR8416094 \n",
+ "28 rumen_microbiome_of_beef_cattle_407 PRJNA448333 SRR8416093 \n",
+ "29 rumen_microbiome_of_beef_cattle_501 PRJNA448333 SRR8416088 \n",
+ "30 rumen_microbiome_of_beef_cattle_503 PRJNA448333 SRR8416087 \n",
+ "31 rumen_microbiome_of_beef_cattle_505 PRJNA448333 SRR8416102 \n",
+ "32 rumen_microbiome_of_beef_cattle_506 PRJNA448333 SRR8416083 \n",
+ "33 rumen_microbiome_of_beef_cattle_601 PRJNA448333 SRR8416063 \n",
+ "34 rumen_microbiome_of_beef_cattle_604 PRJNA448333 SRR8416066 \n",
+ "35 rumen_microbiome_of_beef_cattle_608 PRJNA448333 SRR8416097 \n",
+ "36 rumen_microbiome_of_beef_cattle_611 PRJNA448333 SRR8416098 \n",
+ "37 rumen_microbiome_of_beef_cattle_612 PRJNA448333 SRR8416099 \n",
+ "38 rumen_microbiome_of_beef_cattle_701 PRJNA448333 SRR8416100 \n",
+ "39 rumen_microbiome_of_beef_cattle_702 PRJNA448333 SRR8416077 \n",
+ "40 rumen_microbiome_of_beef_cattle_703 PRJNA448333 SRR8416078 \n",
+ "41 rumen_microbiome_of_beef_cattle_704 PRJNA448333 SRR8416082 \n",
+ "42 rumen_microbiome_of_beef_cattle_705 PRJNA448333 SRR8416081 \n",
+ "43 rumen_microbiome_of_beef_cattle_706 PRJNA448333 SRR8416086 \n",
+ "44 rumen_microbiome_of_beef_cattle_707 PRJNA448333 SRR8416084 \n",
+ "45 rumen_microbiome_of_beef_cattle_708 PRJNA448333 SRR8416101 \n",
+ "46 rumen_microbiome_of_beef_cattle_709 PRJNA448333 SRR8416085 \n",
+ "47 rumen_microbiome_of_beef_cattle_710 PRJNA448333 SRR8416080 \n",
+ "48 rumen_microbiome_of_beef_cattle_712 PRJNA448333 SRR8416079 \n",
+ " mgx_study_accession mgx_run_accession sample_type \n",
+ "1 PRJNA448333 SRR8397906 cattle_rumen\n",
+ "2 PRJNA448333 SRR8404214 cattle_rumen\n",
+ "3 PRJNA448333 SRR8397905 cattle_rumen\n",
+ "4 PRJNA448333 SRR8397904 cattle_rumen\n",
+ "5 PRJNA448333 SRR8397903 cattle_rumen\n",
+ "6 PRJNA448333 SRR8397910 cattle_rumen\n",
+ "7 PRJNA448333 SRR8397909 cattle_rumen\n",
+ "8 PRJNA448333 SRR8397908 cattle_rumen\n",
+ "9 PRJNA448333 SRR8397907 cattle_rumen\n",
+ "10 PRJNA448333 SRR8397902 cattle_rumen\n",
+ "11 PRJNA448333 SRR8397901 cattle_rumen\n",
+ "12 PRJNA448333 SRR8397884 cattle_rumen\n",
+ "13 PRJNA448333 SRR8397883 cattle_rumen\n",
+ "14 PRJNA448333 SRR8397886 cattle_rumen\n",
+ "15 PRJNA448333 SRR8397885 cattle_rumen\n",
+ "16 PRJNA448333 SRR8397888 cattle_rumen\n",
+ "17 PRJNA448333 SRR8397887 cattle_rumen\n",
+ "18 PRJNA448333 SRR8397890 cattle_rumen\n",
+ "19 PRJNA448333 SRR8397889 cattle_rumen\n",
+ "20 PRJNA448333 SRR8397882 cattle_rumen\n",
+ "21 PRJNA448333 SRR8397881 cattle_rumen\n",
+ "22 PRJNA448333 SRR8397917 cattle_rumen\n",
+ "23 PRJNA448333 SRR8397918 cattle_rumen\n",
+ "24 PRJNA448333 SRR8397919 cattle_rumen\n",
+ "25 PRJNA448333 SRR8397920 cattle_rumen\n",
+ "26 PRJNA448333 SRR8397913 cattle_rumen\n",
+ "27 PRJNA448333 SRR8397914 cattle_rumen\n",
+ "28 PRJNA448333 SRR8397915 cattle_rumen\n",
+ "29 PRJNA448333 SRR8397916 cattle_rumen\n",
+ "30 PRJNA448333 SRR8397911 cattle_rumen\n",
+ "31 PRJNA448333 SRR8397912 cattle_rumen\n",
+ "32 PRJNA448333 SRR8397894 cattle_rumen\n",
+ "33 PRJNA448333 SRR8397893 cattle_rumen\n",
+ "34 PRJNA448333 SRR8397892 cattle_rumen\n",
+ "35 PRJNA448333 SRR8397891 cattle_rumen\n",
+ "36 PRJNA448333 SRR8397898 cattle_rumen\n",
+ "37 PRJNA448333 SRR8397897 cattle_rumen\n",
+ "38 PRJNA448333 SRR8397896 cattle_rumen\n",
+ "39 PRJNA448333 SRR8397895 cattle_rumen\n",
+ "40 PRJNA448333 SRR8397900 cattle_rumen\n",
+ "41 PRJNA448333 SRR8397899 cattle_rumen\n",
+ "42 PRJNA448333 SRR8397926 cattle_rumen\n",
+ "43 PRJNA448333 SRR8397927 cattle_rumen\n",
+ "44 PRJNA448333 SRR8397924 cattle_rumen\n",
+ "45 PRJNA448333 SRR8397925 cattle_rumen\n",
+ "46 PRJNA448333 SRR8397922 cattle_rumen\n",
+ "47 PRJNA448333 SRR8397923 cattle_rumen\n",
+ "48 PRJNA448333 SRR8397921 cattle_rumen"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna448333_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a6509917",
+ "metadata": {},
+ "source": [
+ "## PRJNA344005"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "543d0709",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna344005 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA344005&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F) %>%\n",
+ " mutate(sample_name = gsub(\"_[DR]NA*.\", \"\", library_name)) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "f44db485",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna344005_pairs <- prjna344005 %>%\n",
+ " group_by(sample_name) %>%\n",
+ " tally() %>%\n",
+ " filter(n == 2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "7883c2cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna344005 <- prjna344005 %>%\n",
+ " filter(sample_name %in% prjna344005_pairs$sample_name) %>%\n",
+ " select(study_accession, run_accession, sample_name, library_strategy)\n",
+ "\n",
+ "prjna344005_mtx <- prjna344005 %>%\n",
+ " filter(library_strategy == \"RNA-Seq\") %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna344005_mgx <- prjna344005 %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "f8e7fd2f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 2 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tHiTCE_2d | PRJNA344005 | SRR4308224 | PRJNA344005 | SRR4308227 | groundwater |
\n",
+ "\tHiTCEB12_2d | PRJNA344005 | SRR4308225 | PRJNA344005 | SRR4308226 | groundwater |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 2 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t HiTCE\\_2d & PRJNA344005 & SRR4308224 & PRJNA344005 & SRR4308227 & groundwater\\\\\n",
+ "\t HiTCEB12\\_2d & PRJNA344005 & SRR4308225 & PRJNA344005 & SRR4308226 & groundwater\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 2 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| HiTCE_2d | PRJNA344005 | SRR4308224 | PRJNA344005 | SRR4308227 | groundwater |\n",
+ "| HiTCEB12_2d | PRJNA344005 | SRR4308225 | PRJNA344005 | SRR4308226 | groundwater |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 HiTCE_2d PRJNA344005 SRR4308224 PRJNA344005 \n",
+ "2 HiTCEB12_2d PRJNA344005 SRR4308225 PRJNA344005 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR4308227 groundwater\n",
+ "2 SRR4308226 groundwater"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna344005_mtx_vs_mgx <- left_join(prjna344005_mtx, prjna344005_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"groundwater\")\n",
+ "\n",
+ "prjna344005_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e0ae555e",
+ "metadata": {},
+ "source": [
+ "## PRJNA237345 (mtx) & PRJNA237344 (mgx)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "16d89c6e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna237345 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA237345&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)\n",
+ "prjna237344 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA237344&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "d9d8f133",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[1m\u001b[22mJoining, by = \"sample_name\"\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A grouped_df: 23 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tamazon_plume_2_0um_from_station_10 | PRJNA237345 | SRR1193190 | PRJNA237344 | SRR1205250 | river |
\n",
+ "\tamazon_plume_2_0um_from_station_2 | PRJNA237345 | SRR1193177 | PRJNA237344 | SRR1182511 | river |
\n",
+ "\tamazon_plume_2_0um_from_station_23 | PRJNA237345 | SRR1193237 | PRJNA237344 | SRR1202089 | river |
\n",
+ "\tamazon_plume_2_0um_from_station_27 | PRJNA237345 | SRR1193629 | PRJNA237344 | SRR1183643 | river |
\n",
+ "\tamazon_plume_2_0um_from_station_3 | PRJNA237345 | SRR1193226 | PRJNA237344 | SRR1199272 | river |
\n",
+ "\tamazon_plume_0_2_2_0um_from_station_10 | PRJNA237345 | SRR1186930 | PRJNA237344 | SRR1199271 | river |
\n",
+ "\tamazon_plume_0_2_2_0um_from_station_2 | PRJNA237345 | SRR1193205 | PRJNA237344 | SRR1182512 | river |
\n",
+ "\tamazon_plume_0_2_2_0um_from_station_23 | PRJNA237345 | SRR1193632 | PRJNA237344 | SRR1186214 | river |
\n",
+ "\tamazon_plume_0_2_2_0um_from_station_25 | PRJNA237345 | SRR1204579 | PRJNA237344 | SRR1202090 | river |
\n",
+ "\tamazon_plume_0_2_2_0um_from_station_27 | PRJNA237345 | SRR1193627 | PRJNA237344 | SRR1183650 | river |
\n",
+ "\tamazon_plume_0_2_2_0um_from_station_3 | PRJNA237345 | SRR1193215 | PRJNA237344 | SRR1185413 | river |
\n",
+ "\tamazon_river_0_2_2_0um_from_belem | PRJNA237345 | SRR1781804 | PRJNA237344 | SRR1790489 | river |
\n",
+ "\tamazon_river_0_2_2_0um_from_macapa_north_channel | PRJNA237345 | SRR1785209 | PRJNA237344 | SRR1786279 | river |
\n",
+ "\tamazon_river_0_2_2_0um_from_macapa_south_channel | PRJNA237345 | SRR1784299 | PRJNA237344 | SRR1787940 | river |
\n",
+ "\tamazon_river_0_2_2_0um_from_obidos | PRJNA237345 | SRR1781945 | PRJNA237344 | SRR1790676 | river |
\n",
+ "\tamazon_river_0_2_2_0um_from_tapajos_depth | PRJNA237345 | SRR1779221 | PRJNA237344 | SRR1792674 | river |
\n",
+ "\tamazon_river_0_2_2_0um_from_tapajos_surface | PRJNA237345 | SRR1777513 | PRJNA237344 | SRR1796116 | river |
\n",
+ "\tamazon_river_2_0_297um_from_belem | PRJNA237345 | SRR1781811 | PRJNA237344 | SRR1790644 | river |
\n",
+ "\tamazon_river_2_0_297um_from_macapa_north_channel | PRJNA237345 | SRR1785350 | PRJNA237344 | SRR1786281 | river |
\n",
+ "\tamazon_river_2_0_297um_from_macapa_south_channel | PRJNA237345 | SRR1784304 | PRJNA237344 | SRR1787943 | river |
\n",
+ "\tamazon_river_2_0_297um_from_obidos | PRJNA237345 | SRR1782579 | PRJNA237344 | SRR1790678 | river |
\n",
+ "\tamazon_river_2_0_297um_from_tapajos_depth | PRJNA237345 | SRR1781711 | PRJNA237344 | SRR1792852 | river |
\n",
+ "\tamazon_river_2_0_297um_from_tapajos_surface | PRJNA237345 | SRR1778024 | PRJNA237344 | SRR1796118 | river |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A grouped\\_df: 23 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t amazon\\_plume\\_2\\_0um\\_from\\_station\\_10 & PRJNA237345 & SRR1193190 & PRJNA237344 & SRR1205250 & river\\\\\n",
+ "\t amazon\\_plume\\_2\\_0um\\_from\\_station\\_2 & PRJNA237345 & SRR1193177 & PRJNA237344 & SRR1182511 & river\\\\\n",
+ "\t amazon\\_plume\\_2\\_0um\\_from\\_station\\_23 & PRJNA237345 & SRR1193237 & PRJNA237344 & SRR1202089 & river\\\\\n",
+ "\t amazon\\_plume\\_2\\_0um\\_from\\_station\\_27 & PRJNA237345 & SRR1193629 & PRJNA237344 & SRR1183643 & river\\\\\n",
+ "\t amazon\\_plume\\_2\\_0um\\_from\\_station\\_3 & PRJNA237345 & SRR1193226 & PRJNA237344 & SRR1199272 & river\\\\\n",
+ "\t amazon\\_plume\\_0\\_2\\_2\\_0um\\_from\\_station\\_10 & PRJNA237345 & SRR1186930 & PRJNA237344 & SRR1199271 & river\\\\\n",
+ "\t amazon\\_plume\\_0\\_2\\_2\\_0um\\_from\\_station\\_2 & PRJNA237345 & SRR1193205 & PRJNA237344 & SRR1182512 & river\\\\\n",
+ "\t amazon\\_plume\\_0\\_2\\_2\\_0um\\_from\\_station\\_23 & PRJNA237345 & SRR1193632 & PRJNA237344 & SRR1186214 & river\\\\\n",
+ "\t amazon\\_plume\\_0\\_2\\_2\\_0um\\_from\\_station\\_25 & PRJNA237345 & SRR1204579 & PRJNA237344 & SRR1202090 & river\\\\\n",
+ "\t amazon\\_plume\\_0\\_2\\_2\\_0um\\_from\\_station\\_27 & PRJNA237345 & SRR1193627 & PRJNA237344 & SRR1183650 & river\\\\\n",
+ "\t amazon\\_plume\\_0\\_2\\_2\\_0um\\_from\\_station\\_3 & PRJNA237345 & SRR1193215 & PRJNA237344 & SRR1185413 & river\\\\\n",
+ "\t amazon\\_river\\_0\\_2\\_2\\_0um\\_from\\_belem & PRJNA237345 & SRR1781804 & PRJNA237344 & SRR1790489 & river\\\\\n",
+ "\t amazon\\_river\\_0\\_2\\_2\\_0um\\_from\\_macapa\\_north\\_channel & PRJNA237345 & SRR1785209 & PRJNA237344 & SRR1786279 & river\\\\\n",
+ "\t amazon\\_river\\_0\\_2\\_2\\_0um\\_from\\_macapa\\_south\\_channel & PRJNA237345 & SRR1784299 & PRJNA237344 & SRR1787940 & river\\\\\n",
+ "\t amazon\\_river\\_0\\_2\\_2\\_0um\\_from\\_obidos & PRJNA237345 & SRR1781945 & PRJNA237344 & SRR1790676 & river\\\\\n",
+ "\t amazon\\_river\\_0\\_2\\_2\\_0um\\_from\\_tapajos\\_depth & PRJNA237345 & SRR1779221 & PRJNA237344 & SRR1792674 & river\\\\\n",
+ "\t amazon\\_river\\_0\\_2\\_2\\_0um\\_from\\_tapajos\\_surface & PRJNA237345 & SRR1777513 & PRJNA237344 & SRR1796116 & river\\\\\n",
+ "\t amazon\\_river\\_2\\_0\\_297um\\_from\\_belem & PRJNA237345 & SRR1781811 & PRJNA237344 & SRR1790644 & river\\\\\n",
+ "\t amazon\\_river\\_2\\_0\\_297um\\_from\\_macapa\\_north\\_channel & PRJNA237345 & SRR1785350 & PRJNA237344 & SRR1786281 & river\\\\\n",
+ "\t amazon\\_river\\_2\\_0\\_297um\\_from\\_macapa\\_south\\_channel & PRJNA237345 & SRR1784304 & PRJNA237344 & SRR1787943 & river\\\\\n",
+ "\t amazon\\_river\\_2\\_0\\_297um\\_from\\_obidos & PRJNA237345 & SRR1782579 & PRJNA237344 & SRR1790678 & river\\\\\n",
+ "\t amazon\\_river\\_2\\_0\\_297um\\_from\\_tapajos\\_depth & PRJNA237345 & SRR1781711 & PRJNA237344 & SRR1792852 & river\\\\\n",
+ "\t amazon\\_river\\_2\\_0\\_297um\\_from\\_tapajos\\_surface & PRJNA237345 & SRR1778024 & PRJNA237344 & SRR1796118 & river\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A grouped_df: 23 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| amazon_plume_2_0um_from_station_10 | PRJNA237345 | SRR1193190 | PRJNA237344 | SRR1205250 | river |\n",
+ "| amazon_plume_2_0um_from_station_2 | PRJNA237345 | SRR1193177 | PRJNA237344 | SRR1182511 | river |\n",
+ "| amazon_plume_2_0um_from_station_23 | PRJNA237345 | SRR1193237 | PRJNA237344 | SRR1202089 | river |\n",
+ "| amazon_plume_2_0um_from_station_27 | PRJNA237345 | SRR1193629 | PRJNA237344 | SRR1183643 | river |\n",
+ "| amazon_plume_2_0um_from_station_3 | PRJNA237345 | SRR1193226 | PRJNA237344 | SRR1199272 | river |\n",
+ "| amazon_plume_0_2_2_0um_from_station_10 | PRJNA237345 | SRR1186930 | PRJNA237344 | SRR1199271 | river |\n",
+ "| amazon_plume_0_2_2_0um_from_station_2 | PRJNA237345 | SRR1193205 | PRJNA237344 | SRR1182512 | river |\n",
+ "| amazon_plume_0_2_2_0um_from_station_23 | PRJNA237345 | SRR1193632 | PRJNA237344 | SRR1186214 | river |\n",
+ "| amazon_plume_0_2_2_0um_from_station_25 | PRJNA237345 | SRR1204579 | PRJNA237344 | SRR1202090 | river |\n",
+ "| amazon_plume_0_2_2_0um_from_station_27 | PRJNA237345 | SRR1193627 | PRJNA237344 | SRR1183650 | river |\n",
+ "| amazon_plume_0_2_2_0um_from_station_3 | PRJNA237345 | SRR1193215 | PRJNA237344 | SRR1185413 | river |\n",
+ "| amazon_river_0_2_2_0um_from_belem | PRJNA237345 | SRR1781804 | PRJNA237344 | SRR1790489 | river |\n",
+ "| amazon_river_0_2_2_0um_from_macapa_north_channel | PRJNA237345 | SRR1785209 | PRJNA237344 | SRR1786279 | river |\n",
+ "| amazon_river_0_2_2_0um_from_macapa_south_channel | PRJNA237345 | SRR1784299 | PRJNA237344 | SRR1787940 | river |\n",
+ "| amazon_river_0_2_2_0um_from_obidos | PRJNA237345 | SRR1781945 | PRJNA237344 | SRR1790676 | river |\n",
+ "| amazon_river_0_2_2_0um_from_tapajos_depth | PRJNA237345 | SRR1779221 | PRJNA237344 | SRR1792674 | river |\n",
+ "| amazon_river_0_2_2_0um_from_tapajos_surface | PRJNA237345 | SRR1777513 | PRJNA237344 | SRR1796116 | river |\n",
+ "| amazon_river_2_0_297um_from_belem | PRJNA237345 | SRR1781811 | PRJNA237344 | SRR1790644 | river |\n",
+ "| amazon_river_2_0_297um_from_macapa_north_channel | PRJNA237345 | SRR1785350 | PRJNA237344 | SRR1786281 | river |\n",
+ "| amazon_river_2_0_297um_from_macapa_south_channel | PRJNA237345 | SRR1784304 | PRJNA237344 | SRR1787943 | river |\n",
+ "| amazon_river_2_0_297um_from_obidos | PRJNA237345 | SRR1782579 | PRJNA237344 | SRR1790678 | river |\n",
+ "| amazon_river_2_0_297um_from_tapajos_depth | PRJNA237345 | SRR1781711 | PRJNA237344 | SRR1792852 | river |\n",
+ "| amazon_river_2_0_297um_from_tapajos_surface | PRJNA237345 | SRR1778024 | PRJNA237344 | SRR1796118 | river |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession\n",
+ "1 amazon_plume_2_0um_from_station_10 PRJNA237345 \n",
+ "2 amazon_plume_2_0um_from_station_2 PRJNA237345 \n",
+ "3 amazon_plume_2_0um_from_station_23 PRJNA237345 \n",
+ "4 amazon_plume_2_0um_from_station_27 PRJNA237345 \n",
+ "5 amazon_plume_2_0um_from_station_3 PRJNA237345 \n",
+ "6 amazon_plume_0_2_2_0um_from_station_10 PRJNA237345 \n",
+ "7 amazon_plume_0_2_2_0um_from_station_2 PRJNA237345 \n",
+ "8 amazon_plume_0_2_2_0um_from_station_23 PRJNA237345 \n",
+ "9 amazon_plume_0_2_2_0um_from_station_25 PRJNA237345 \n",
+ "10 amazon_plume_0_2_2_0um_from_station_27 PRJNA237345 \n",
+ "11 amazon_plume_0_2_2_0um_from_station_3 PRJNA237345 \n",
+ "12 amazon_river_0_2_2_0um_from_belem PRJNA237345 \n",
+ "13 amazon_river_0_2_2_0um_from_macapa_north_channel PRJNA237345 \n",
+ "14 amazon_river_0_2_2_0um_from_macapa_south_channel PRJNA237345 \n",
+ "15 amazon_river_0_2_2_0um_from_obidos PRJNA237345 \n",
+ "16 amazon_river_0_2_2_0um_from_tapajos_depth PRJNA237345 \n",
+ "17 amazon_river_0_2_2_0um_from_tapajos_surface PRJNA237345 \n",
+ "18 amazon_river_2_0_297um_from_belem PRJNA237345 \n",
+ "19 amazon_river_2_0_297um_from_macapa_north_channel PRJNA237345 \n",
+ "20 amazon_river_2_0_297um_from_macapa_south_channel PRJNA237345 \n",
+ "21 amazon_river_2_0_297um_from_obidos PRJNA237345 \n",
+ "22 amazon_river_2_0_297um_from_tapajos_depth PRJNA237345 \n",
+ "23 amazon_river_2_0_297um_from_tapajos_surface PRJNA237345 \n",
+ " mtx_run_accession mgx_study_accession mgx_run_accession sample_type\n",
+ "1 SRR1193190 PRJNA237344 SRR1205250 river \n",
+ "2 SRR1193177 PRJNA237344 SRR1182511 river \n",
+ "3 SRR1193237 PRJNA237344 SRR1202089 river \n",
+ "4 SRR1193629 PRJNA237344 SRR1183643 river \n",
+ "5 SRR1193226 PRJNA237344 SRR1199272 river \n",
+ "6 SRR1186930 PRJNA237344 SRR1199271 river \n",
+ "7 SRR1193205 PRJNA237344 SRR1182512 river \n",
+ "8 SRR1193632 PRJNA237344 SRR1186214 river \n",
+ "9 SRR1204579 PRJNA237344 SRR1202090 river \n",
+ "10 SRR1193627 PRJNA237344 SRR1183650 river \n",
+ "11 SRR1193215 PRJNA237344 SRR1185413 river \n",
+ "12 SRR1781804 PRJNA237344 SRR1790489 river \n",
+ "13 SRR1785209 PRJNA237344 SRR1786279 river \n",
+ "14 SRR1784299 PRJNA237344 SRR1787940 river \n",
+ "15 SRR1781945 PRJNA237344 SRR1790676 river \n",
+ "16 SRR1779221 PRJNA237344 SRR1792674 river \n",
+ "17 SRR1777513 PRJNA237344 SRR1796116 river \n",
+ "18 SRR1781811 PRJNA237344 SRR1790644 river \n",
+ "19 SRR1785350 PRJNA237344 SRR1786281 river \n",
+ "20 SRR1784304 PRJNA237344 SRR1787943 river \n",
+ "21 SRR1782579 PRJNA237344 SRR1790678 river \n",
+ "22 SRR1781711 PRJNA237344 SRR1792852 river \n",
+ "23 SRR1778024 PRJNA237344 SRR1796118 river "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "tmp1 <- prjna237345 %>% \n",
+ " select(sample_name = experiment_title, mtx_study_accession = study_accession, mtx_run_accession = run_accession) %>%\n",
+ " mutate(sample_name = gsub(\"Metatranscriptome \", \"\", sample_name),\n",
+ " sample_name = gsub(\"\\\\(.*\", \"\", sample_name))\n",
+ "\n",
+ "tmp2 <- prjna237344 %>% \n",
+ " select(sample_name = experiment_title, mgx_study_accession = study_accession, mgx_run_accession = run_accession) %>%\n",
+ " mutate(sample_name = gsub(\"Metagenome \", \"\", sample_name),\n",
+ " sample_name = gsub(\"\\\\(.*\", \"\", sample_name))\n",
+ "\n",
+ "prjna237345_vs_prjna237344 <- left_join(tmp1, tmp2) %>%\n",
+ " filter(!is.na(mgx_study_accession)) %>%\n",
+ " mutate(sample_name = gsub(\"Illumina Genome Analyzer IIx sequencing; May-June 2010 \", \"\", sample_name),\n",
+ " sample_name = gsub(\"Illumina HiSeq 2500 sequencing; May 2011 \", \"\", sample_name)) %>%\n",
+ " group_by(sample_name) %>%\n",
+ " slice(n = 1) %>%\n",
+ " mutate(sample_name = make_clean_names(sample_name),\n",
+ " sample_type = \"river\")\n",
+ "\n",
+ "prjna237345_vs_prjna237344"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "49c6b061",
+ "metadata": {},
+ "source": [
+ "## PRJNA453733"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "f074e2aa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna453733 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA453733&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F) %>%\n",
+ " filter(library_strategy == \"OTHER\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "947b707d",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 5 × 5\n",
+ "\n",
+ "\tstudy_accession | run_accession | sample_alias | library_strategy | library_source |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tPRJNA453733 | SRR7083928 | CS-Sed16-2cmA | OTHER | METATRANSCRIPTOMIC |
\n",
+ "\tPRJNA453733 | SRR7083929 | CS-Sed16-2cmB | OTHER | METAGENOMIC |
\n",
+ "\tPRJNA453733 | SRR7083930 | CS-Br16 | OTHER | METAGENOMIC |
\n",
+ "\tPRJNA453733 | SRR7083931 | CS-Sed16-2cmA | OTHER | METAGENOMIC |
\n",
+ "\tPRJNA453733 | SRR7083934 | CS-Sed16-5cm | OTHER | METAGENOMIC |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 5 × 5\n",
+ "\\begin{tabular}{lllll}\n",
+ " study\\_accession & run\\_accession & sample\\_alias & library\\_strategy & library\\_source\\\\\n",
+ " & & & & \\\\\n",
+ "\\hline\n",
+ "\t PRJNA453733 & SRR7083928 & CS-Sed16-2cmA & OTHER & METATRANSCRIPTOMIC\\\\\n",
+ "\t PRJNA453733 & SRR7083929 & CS-Sed16-2cmB & OTHER & METAGENOMIC \\\\\n",
+ "\t PRJNA453733 & SRR7083930 & CS-Br16 & OTHER & METAGENOMIC \\\\\n",
+ "\t PRJNA453733 & SRR7083931 & CS-Sed16-2cmA & OTHER & METAGENOMIC \\\\\n",
+ "\t PRJNA453733 & SRR7083934 & CS-Sed16-5cm & OTHER & METAGENOMIC \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 5 × 5\n",
+ "\n",
+ "| study_accession <chr> | run_accession <chr> | sample_alias <chr> | library_strategy <chr> | library_source <chr> |\n",
+ "|---|---|---|---|---|\n",
+ "| PRJNA453733 | SRR7083928 | CS-Sed16-2cmA | OTHER | METATRANSCRIPTOMIC |\n",
+ "| PRJNA453733 | SRR7083929 | CS-Sed16-2cmB | OTHER | METAGENOMIC |\n",
+ "| PRJNA453733 | SRR7083930 | CS-Br16 | OTHER | METAGENOMIC |\n",
+ "| PRJNA453733 | SRR7083931 | CS-Sed16-2cmA | OTHER | METAGENOMIC |\n",
+ "| PRJNA453733 | SRR7083934 | CS-Sed16-5cm | OTHER | METAGENOMIC |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " study_accession run_accession sample_alias library_strategy\n",
+ "1 PRJNA453733 SRR7083928 CS-Sed16-2cmA OTHER \n",
+ "2 PRJNA453733 SRR7083929 CS-Sed16-2cmB OTHER \n",
+ "3 PRJNA453733 SRR7083930 CS-Br16 OTHER \n",
+ "4 PRJNA453733 SRR7083931 CS-Sed16-2cmA OTHER \n",
+ "5 PRJNA453733 SRR7083934 CS-Sed16-5cm OTHER \n",
+ " library_source \n",
+ "1 METATRANSCRIPTOMIC\n",
+ "2 METAGENOMIC \n",
+ "3 METAGENOMIC \n",
+ "4 METAGENOMIC \n",
+ "5 METAGENOMIC "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna453733 %>% \n",
+ " select(study_accession, run_accession, sample_alias, library_strategy, library_source)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "18b6e2a1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 1 × 2\n",
+ "\n",
+ "\tsample_alias | n |
\n",
+ "\t<chr> | <int> |
\n",
+ "\n",
+ "\n",
+ "\tCS-Sed16-2cmA | 2 |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 1 × 2\n",
+ "\\begin{tabular}{ll}\n",
+ " sample\\_alias & n\\\\\n",
+ " & \\\\\n",
+ "\\hline\n",
+ "\t CS-Sed16-2cmA & 2\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 1 × 2\n",
+ "\n",
+ "| sample_alias <chr> | n <int> |\n",
+ "|---|---|\n",
+ "| CS-Sed16-2cmA | 2 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_alias n\n",
+ "1 CS-Sed16-2cmA 2"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna453733_pairs <- prjna453733 %>%\n",
+ " group_by(sample_alias) %>%\n",
+ " tally() %>%\n",
+ " filter(n == 2)\n",
+ "\n",
+ "prjna453733_pairs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "9016f1ab",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 1 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tCS-Sed16-2cmA | PRJNA453733 | SRR7083928 | PRJNA453733 | SRR7083931 | lake |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 1 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t CS-Sed16-2cmA & PRJNA453733 & SRR7083928 & PRJNA453733 & SRR7083931 & lake\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 1 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| CS-Sed16-2cmA | PRJNA453733 | SRR7083928 | PRJNA453733 | SRR7083931 | lake |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 CS-Sed16-2cmA PRJNA453733 SRR7083928 PRJNA453733 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR7083931 lake "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna453733_mtx <- prjna453733 %>%\n",
+ " filter(sample_alias %in% prjna453733_pairs$sample_alias) %>%\n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession, \n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna453733_mgx <- prjna453733 %>%\n",
+ " filter(sample_alias %in% prjna453733_pairs$sample_alias) %>%\n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession, \n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna453733_mtx_vs_mgx <- left_join(prjna453733_mtx, prjna453733_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"lake\")\n",
+ "prjna453733_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7e95696f",
+ "metadata": {},
+ "source": [
+ "## Microbial metagenomes and metatranscriptomes during a coastal phytoplankton bloom (one PRJNA per sample)\n",
+ "The samples published in the above title each have there own study accession and run accession. \n",
+ "This section of the notebook parses two supplementary files from the publication to get all of the sample bioproject numbers and to determine which bioproject numbers represent pairs of samples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "eee996ab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "download.file(url = \"https://static-content.springer.com/esm/art%3A10.1038%2Fs41597-019-0132-4/MediaObjects/41597_2019_132_MOESM1_ESM.zip\",\n",
+ " destfile = \"inputs/metadata/41597_2019_132_MOESM1_ESM.zip\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "89777aca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unzip(\"inputs/metadata/41597_2019_132_MOESM1_ESM.zip\", exdir = \"inputs/metadata/41597_2019_132_MOESM1_ESM\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "102b69b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "moran010B_mgx <- read_tsv(\"inputs/metadata/41597_2019_132_MOESM1_ESM/a_Moran010B_dna.txt\", show_col_types = F) %>%\n",
+ " clean_names() %>%\n",
+ " select(sample_name, mgx_study_accession = assay_name) %>%\n",
+ " mutate(sample_name = gsub(\"D\", \"\", sample_name))\n",
+ "\n",
+ "moran010B_mtx <- read_tsv(\"inputs/metadata/41597_2019_132_MOESM1_ESM/a_Moran010B_rna.txt\", show_col_types = F) %>%\n",
+ " clean_names() %>%\n",
+ " select(sample_name, mtx_study_accession = assay_name) %>%\n",
+ " mutate(sample_name = gsub(\"R\", \"\", sample_name))\n",
+ "\n",
+ "moran010B <- inner_join(moran010B_mtx, moran010B_mgx, by = \"sample_name\") "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "bbc1cf54",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# use the moran010B study accessions to programatically generate URLs to get run accessions for each sample\n",
+ "moran010B_run_accessions <- data.frame()\n",
+ "for(study_accession in c(moran010B$mtx_study_accession, moran010B_mgx$mgx_study_accession)){\n",
+ " url <- paste0(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=\", study_accession, \"&result=read_run&fields=study_accession,run_accession&format=tsv&download=true&limit=0\")\n",
+ " run_accession <- read_tsv(url, show_col_types = F) \n",
+ " moran010B_run_accessions <- bind_rows(moran010B_run_accessions, run_accession)\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "5207828e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 74 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t26 | PRJNA502453 | SRR8361532 | PRJNA467728 | SRR7592711 | ocean |
\n",
+ "\t58_r | PRJNA502454 | SRR8361534 | PRJNA467772 | SRR7609573 | ocean |
\n",
+ "\t125_r | PRJNA468332 | SRR7962480 | PRJNA467773 | SRR7609569 | ocean |
\n",
+ "\t8 | PRJNA502455 | SRR8297879 | PRJNA467724 | SRR7592287 | ocean |
\n",
+ "\t85 | PRJNA502456 | SRR8297845 | PRJNA467765 | SRR7609362 | ocean |
\n",
+ "\t76 | PRJNA468305 | SRR7949679 | PRJNA467757 | SRR7608731 | ocean |
\n",
+ "\t40 | PRJNA467774 | SRR7609608 | PRJNA467736 | SRR7595425 | ocean |
\n",
+ "\t53 | PRJNA468299 | SRR7609574 | PRJNA502421 | SRR8361352 | ocean |
\n",
+ "\t73 | PRJNA468306 | SRR7949683 | PRJNA467754 | SRR7608223 | ocean |
\n",
+ "\t23 | PRJNA467775 | SRR7609632 | PRJNA467727 | SRR7633009 | ocean |
\n",
+ "\t49 | PRJNA467776 | SRR7609653 | PRJNA502442 | SRR8361104 | ocean |
\n",
+ "\t14 | PRJNA468308 | SRR7949682 | PRJNA502410 | SRR8360542 | ocean |
\n",
+ "\t44 | PRJNA502457 | SRR8361579 | PRJNA467738 | SRR7600393 | ocean |
\n",
+ "\t29 | PRJNA468309 | SRR7949681 | PRJNA467730 | SRR7593831 | ocean |
\n",
+ "\t18 | PRJNA467777 | SRR7609655 | PRJNA502413 | SRR8360544 | ocean |
\n",
+ "\t24 | PRJNA502458 | SRR8361603 | PRJNA502440 | SRR8360549 | ocean |
\n",
+ "\t79 | PRJNA468143 | SRR7609654 | PRJNA467760 | SRR7609143 | ocean |
\n",
+ "\t22 | PRJNA468300 | SRR7609599 | PRJNA502414 | SRR8360545 | ocean |
\n",
+ "\t21 | PRJNA468310 | SRR7949678 | PRJNA468208 | SRR7592488 | ocean |
\n",
+ "\t60 | PRJNA467778 | SRR7609828 | PRJNA502425 | SRR8361517 | ocean |
\n",
+ "\t15 | PRJNA468144 | SRR7609827 | PRJNA502411 | SRR8360457 | ocean |
\n",
+ "\t75 | PRJNA502459 | SRR8361602 | PRJNA467756 | SRR7608423 | ocean |
\n",
+ "\t3 | PRJNA468311 | SRR7949692 | PRJNA467722 | SRR7592226 | ocean |
\n",
+ "\t20 | PRJNA467779 | SRR7609829 | PRJNA467726 | SRR7592489 | ocean |
\n",
+ "\t91 | PRJNA468312 | SRR7949677 | PRJNA467769 | SRR7609461 | ocean |
\n",
+ "\t56 | PRJNA468152 | SRR7615220 | PRJNA467743 | SRR7601261 | ocean |
\n",
+ "\t46 | PRJNA502460 | SRR8297843 | PRJNA468213 | SRR7600395 | ocean |
\n",
+ "\t2 | PRJNA468313 | SRR7609607 | PRJNA467721 | SRR7592225 | ocean |
\n",
+ "\t38 | PRJNA468301 | SRR7609600 | PRJNA468211 | SRR7594461 | ocean |
\n",
+ "\t45 | PRJNA468314 | SRR7962234 | PRJNA467739 | SRR7600394 | ocean |
\n",
+ "\t⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
\n",
+ "\t32 | PRJNA468321 | SRR7962267 | PRJNA468210 | SRR7594064 | ocean |
\n",
+ "\t81 | PRJNA468145 | SRR7610454 | PRJNA467762 | SRR7609163 | ocean |
\n",
+ "\t70 | PRJNA468322 | SRR7962239 | PRJNA467752 | SRR7608010 | ocean |
\n",
+ "\t67 | PRJNA468323 | SRR7962329 | PRJNA467750 | SRR7607757 | ocean |
\n",
+ "\t33 | PRJNA467783 | SRR7610453 | PRJNA502417 | SRR8361102 | ocean |
\n",
+ "\t27 | PRJNA468331 | SRR7962479 | PRJNA468209 | SRR7593830 | ocean |
\n",
+ "\t34 | PRJNA502465 | SRR8297868 | PRJNA467732 | SRR7594085 | ocean |
\n",
+ "\t4 | PRJNA468146 | SRR7610452 | PRJNA467723 | SRR7592236 | ocean |
\n",
+ "\t57 | PRJNA468147 | SRR7610935 | PRJNA502423 | SRR8361526 | ocean |
\n",
+ "\t1 | PRJNA502466 | SRR8361608 | PRJNA467720 | SRR7592212 | ocean |
\n",
+ "\t47 | PRJNA468148 | SRR7610853 | PRJNA467740 | SRR7600454 | ocean |
\n",
+ "\t9 | PRJNA502467 | SRR8297869 | PRJNA502408 | SRR8360547 | ocean |
\n",
+ "\t77 | PRJNA468324 | SRR7962474 | PRJNA467758 | SRR7608832 | ocean |
\n",
+ "\t55 | PRJNA467784 | SRR7610867 | PRJNA502422 | SRR8361371 | ocean |
\n",
+ "\t10 | PRJNA468325 | SRR7962473 | PRJNA502409 | SRR8360543 | ocean |
\n",
+ "\t11 | PRJNA468326 | SRR7962475 | PRJNA467725 | SRR7592321 | ocean |
\n",
+ "\t58 | PRJNA502468 | SRR8297878 | PRJNA502424 | SRR8361527 | ocean |
\n",
+ "\t52 | PRJNA468327 | SRR7962478 | PRJNA467742 | SRR7600455 | ocean |
\n",
+ "\t51 | PRJNA468149 | SRR7611057 | PRJNA502420 | SRR8361350 | ocean |
\n",
+ "\t50 | PRJNA468328 | SRR7962477 | PRJNA502419 | SRR8361349 | ocean |
\n",
+ "\t90 | PRJNA467785 | SRR7611056 | PRJNA467768 | SRR7609452 | ocean |
\n",
+ "\t80 | PRJNA467786 | SRR7611058 | PRJNA467761 | SRR7609166 | ocean |
\n",
+ "\t89 | PRJNA468150 | SRR7611500 | PRJNA467766 | SRR7609363 | ocean |
\n",
+ "\t35 | PRJNA467787 | SRR7612580 | PRJNA502441 | SRR8361101 | ocean |
\n",
+ "\t71 | PRJNA468329 | SRR7962476 | PRJNA467753 | SRR7608017 | ocean |
\n",
+ "\t31 | PRJNA468151 | SRR7611499 | PRJNA467731 | SRR7593934 | ocean |
\n",
+ "\t84 | PRJNA502451 | SRR8393198 | PRJNA467764 | SRR7609361 | ocean |
\n",
+ "\t61 | PRJNA467788 | SRR7615219 | PRJNA502426 | SRR8361516 | ocean |
\n",
+ "\t36_r | PRJNA468330 | SRR7962481 | PRJNA467770 | SRR7609462 | ocean |
\n",
+ "\t82 | PRJNA502452 | SRR8361531 | PRJNA467763 | SRR7609344 | ocean |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 74 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t 26 & PRJNA502453 & SRR8361532 & PRJNA467728 & SRR7592711 & ocean\\\\\n",
+ "\t 58\\_r & PRJNA502454 & SRR8361534 & PRJNA467772 & SRR7609573 & ocean\\\\\n",
+ "\t 125\\_r & PRJNA468332 & SRR7962480 & PRJNA467773 & SRR7609569 & ocean\\\\\n",
+ "\t 8 & PRJNA502455 & SRR8297879 & PRJNA467724 & SRR7592287 & ocean\\\\\n",
+ "\t 85 & PRJNA502456 & SRR8297845 & PRJNA467765 & SRR7609362 & ocean\\\\\n",
+ "\t 76 & PRJNA468305 & SRR7949679 & PRJNA467757 & SRR7608731 & ocean\\\\\n",
+ "\t 40 & PRJNA467774 & SRR7609608 & PRJNA467736 & SRR7595425 & ocean\\\\\n",
+ "\t 53 & PRJNA468299 & SRR7609574 & PRJNA502421 & SRR8361352 & ocean\\\\\n",
+ "\t 73 & PRJNA468306 & SRR7949683 & PRJNA467754 & SRR7608223 & ocean\\\\\n",
+ "\t 23 & PRJNA467775 & SRR7609632 & PRJNA467727 & SRR7633009 & ocean\\\\\n",
+ "\t 49 & PRJNA467776 & SRR7609653 & PRJNA502442 & SRR8361104 & ocean\\\\\n",
+ "\t 14 & PRJNA468308 & SRR7949682 & PRJNA502410 & SRR8360542 & ocean\\\\\n",
+ "\t 44 & PRJNA502457 & SRR8361579 & PRJNA467738 & SRR7600393 & ocean\\\\\n",
+ "\t 29 & PRJNA468309 & SRR7949681 & PRJNA467730 & SRR7593831 & ocean\\\\\n",
+ "\t 18 & PRJNA467777 & SRR7609655 & PRJNA502413 & SRR8360544 & ocean\\\\\n",
+ "\t 24 & PRJNA502458 & SRR8361603 & PRJNA502440 & SRR8360549 & ocean\\\\\n",
+ "\t 79 & PRJNA468143 & SRR7609654 & PRJNA467760 & SRR7609143 & ocean\\\\\n",
+ "\t 22 & PRJNA468300 & SRR7609599 & PRJNA502414 & SRR8360545 & ocean\\\\\n",
+ "\t 21 & PRJNA468310 & SRR7949678 & PRJNA468208 & SRR7592488 & ocean\\\\\n",
+ "\t 60 & PRJNA467778 & SRR7609828 & PRJNA502425 & SRR8361517 & ocean\\\\\n",
+ "\t 15 & PRJNA468144 & SRR7609827 & PRJNA502411 & SRR8360457 & ocean\\\\\n",
+ "\t 75 & PRJNA502459 & SRR8361602 & PRJNA467756 & SRR7608423 & ocean\\\\\n",
+ "\t 3 & PRJNA468311 & SRR7949692 & PRJNA467722 & SRR7592226 & ocean\\\\\n",
+ "\t 20 & PRJNA467779 & SRR7609829 & PRJNA467726 & SRR7592489 & ocean\\\\\n",
+ "\t 91 & PRJNA468312 & SRR7949677 & PRJNA467769 & SRR7609461 & ocean\\\\\n",
+ "\t 56 & PRJNA468152 & SRR7615220 & PRJNA467743 & SRR7601261 & ocean\\\\\n",
+ "\t 46 & PRJNA502460 & SRR8297843 & PRJNA468213 & SRR7600395 & ocean\\\\\n",
+ "\t 2 & PRJNA468313 & SRR7609607 & PRJNA467721 & SRR7592225 & ocean\\\\\n",
+ "\t 38 & PRJNA468301 & SRR7609600 & PRJNA468211 & SRR7594461 & ocean\\\\\n",
+ "\t 45 & PRJNA468314 & SRR7962234 & PRJNA467739 & SRR7600394 & ocean\\\\\n",
+ "\t ⋮ & ⋮ & ⋮ & ⋮ & ⋮ & ⋮\\\\\n",
+ "\t 32 & PRJNA468321 & SRR7962267 & PRJNA468210 & SRR7594064 & ocean\\\\\n",
+ "\t 81 & PRJNA468145 & SRR7610454 & PRJNA467762 & SRR7609163 & ocean\\\\\n",
+ "\t 70 & PRJNA468322 & SRR7962239 & PRJNA467752 & SRR7608010 & ocean\\\\\n",
+ "\t 67 & PRJNA468323 & SRR7962329 & PRJNA467750 & SRR7607757 & ocean\\\\\n",
+ "\t 33 & PRJNA467783 & SRR7610453 & PRJNA502417 & SRR8361102 & ocean\\\\\n",
+ "\t 27 & PRJNA468331 & SRR7962479 & PRJNA468209 & SRR7593830 & ocean\\\\\n",
+ "\t 34 & PRJNA502465 & SRR8297868 & PRJNA467732 & SRR7594085 & ocean\\\\\n",
+ "\t 4 & PRJNA468146 & SRR7610452 & PRJNA467723 & SRR7592236 & ocean\\\\\n",
+ "\t 57 & PRJNA468147 & SRR7610935 & PRJNA502423 & SRR8361526 & ocean\\\\\n",
+ "\t 1 & PRJNA502466 & SRR8361608 & PRJNA467720 & SRR7592212 & ocean\\\\\n",
+ "\t 47 & PRJNA468148 & SRR7610853 & PRJNA467740 & SRR7600454 & ocean\\\\\n",
+ "\t 9 & PRJNA502467 & SRR8297869 & PRJNA502408 & SRR8360547 & ocean\\\\\n",
+ "\t 77 & PRJNA468324 & SRR7962474 & PRJNA467758 & SRR7608832 & ocean\\\\\n",
+ "\t 55 & PRJNA467784 & SRR7610867 & PRJNA502422 & SRR8361371 & ocean\\\\\n",
+ "\t 10 & PRJNA468325 & SRR7962473 & PRJNA502409 & SRR8360543 & ocean\\\\\n",
+ "\t 11 & PRJNA468326 & SRR7962475 & PRJNA467725 & SRR7592321 & ocean\\\\\n",
+ "\t 58 & PRJNA502468 & SRR8297878 & PRJNA502424 & SRR8361527 & ocean\\\\\n",
+ "\t 52 & PRJNA468327 & SRR7962478 & PRJNA467742 & SRR7600455 & ocean\\\\\n",
+ "\t 51 & PRJNA468149 & SRR7611057 & PRJNA502420 & SRR8361350 & ocean\\\\\n",
+ "\t 50 & PRJNA468328 & SRR7962477 & PRJNA502419 & SRR8361349 & ocean\\\\\n",
+ "\t 90 & PRJNA467785 & SRR7611056 & PRJNA467768 & SRR7609452 & ocean\\\\\n",
+ "\t 80 & PRJNA467786 & SRR7611058 & PRJNA467761 & SRR7609166 & ocean\\\\\n",
+ "\t 89 & PRJNA468150 & SRR7611500 & PRJNA467766 & SRR7609363 & ocean\\\\\n",
+ "\t 35 & PRJNA467787 & SRR7612580 & PRJNA502441 & SRR8361101 & ocean\\\\\n",
+ "\t 71 & PRJNA468329 & SRR7962476 & PRJNA467753 & SRR7608017 & ocean\\\\\n",
+ "\t 31 & PRJNA468151 & SRR7611499 & PRJNA467731 & SRR7593934 & ocean\\\\\n",
+ "\t 84 & PRJNA502451 & SRR8393198 & PRJNA467764 & SRR7609361 & ocean\\\\\n",
+ "\t 61 & PRJNA467788 & SRR7615219 & PRJNA502426 & SRR8361516 & ocean\\\\\n",
+ "\t 36\\_r & PRJNA468330 & SRR7962481 & PRJNA467770 & SRR7609462 & ocean\\\\\n",
+ "\t 82 & PRJNA502452 & SRR8361531 & PRJNA467763 & SRR7609344 & ocean\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 74 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| 26 | PRJNA502453 | SRR8361532 | PRJNA467728 | SRR7592711 | ocean |\n",
+ "| 58_r | PRJNA502454 | SRR8361534 | PRJNA467772 | SRR7609573 | ocean |\n",
+ "| 125_r | PRJNA468332 | SRR7962480 | PRJNA467773 | SRR7609569 | ocean |\n",
+ "| 8 | PRJNA502455 | SRR8297879 | PRJNA467724 | SRR7592287 | ocean |\n",
+ "| 85 | PRJNA502456 | SRR8297845 | PRJNA467765 | SRR7609362 | ocean |\n",
+ "| 76 | PRJNA468305 | SRR7949679 | PRJNA467757 | SRR7608731 | ocean |\n",
+ "| 40 | PRJNA467774 | SRR7609608 | PRJNA467736 | SRR7595425 | ocean |\n",
+ "| 53 | PRJNA468299 | SRR7609574 | PRJNA502421 | SRR8361352 | ocean |\n",
+ "| 73 | PRJNA468306 | SRR7949683 | PRJNA467754 | SRR7608223 | ocean |\n",
+ "| 23 | PRJNA467775 | SRR7609632 | PRJNA467727 | SRR7633009 | ocean |\n",
+ "| 49 | PRJNA467776 | SRR7609653 | PRJNA502442 | SRR8361104 | ocean |\n",
+ "| 14 | PRJNA468308 | SRR7949682 | PRJNA502410 | SRR8360542 | ocean |\n",
+ "| 44 | PRJNA502457 | SRR8361579 | PRJNA467738 | SRR7600393 | ocean |\n",
+ "| 29 | PRJNA468309 | SRR7949681 | PRJNA467730 | SRR7593831 | ocean |\n",
+ "| 18 | PRJNA467777 | SRR7609655 | PRJNA502413 | SRR8360544 | ocean |\n",
+ "| 24 | PRJNA502458 | SRR8361603 | PRJNA502440 | SRR8360549 | ocean |\n",
+ "| 79 | PRJNA468143 | SRR7609654 | PRJNA467760 | SRR7609143 | ocean |\n",
+ "| 22 | PRJNA468300 | SRR7609599 | PRJNA502414 | SRR8360545 | ocean |\n",
+ "| 21 | PRJNA468310 | SRR7949678 | PRJNA468208 | SRR7592488 | ocean |\n",
+ "| 60 | PRJNA467778 | SRR7609828 | PRJNA502425 | SRR8361517 | ocean |\n",
+ "| 15 | PRJNA468144 | SRR7609827 | PRJNA502411 | SRR8360457 | ocean |\n",
+ "| 75 | PRJNA502459 | SRR8361602 | PRJNA467756 | SRR7608423 | ocean |\n",
+ "| 3 | PRJNA468311 | SRR7949692 | PRJNA467722 | SRR7592226 | ocean |\n",
+ "| 20 | PRJNA467779 | SRR7609829 | PRJNA467726 | SRR7592489 | ocean |\n",
+ "| 91 | PRJNA468312 | SRR7949677 | PRJNA467769 | SRR7609461 | ocean |\n",
+ "| 56 | PRJNA468152 | SRR7615220 | PRJNA467743 | SRR7601261 | ocean |\n",
+ "| 46 | PRJNA502460 | SRR8297843 | PRJNA468213 | SRR7600395 | ocean |\n",
+ "| 2 | PRJNA468313 | SRR7609607 | PRJNA467721 | SRR7592225 | ocean |\n",
+ "| 38 | PRJNA468301 | SRR7609600 | PRJNA468211 | SRR7594461 | ocean |\n",
+ "| 45 | PRJNA468314 | SRR7962234 | PRJNA467739 | SRR7600394 | ocean |\n",
+ "| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |\n",
+ "| 32 | PRJNA468321 | SRR7962267 | PRJNA468210 | SRR7594064 | ocean |\n",
+ "| 81 | PRJNA468145 | SRR7610454 | PRJNA467762 | SRR7609163 | ocean |\n",
+ "| 70 | PRJNA468322 | SRR7962239 | PRJNA467752 | SRR7608010 | ocean |\n",
+ "| 67 | PRJNA468323 | SRR7962329 | PRJNA467750 | SRR7607757 | ocean |\n",
+ "| 33 | PRJNA467783 | SRR7610453 | PRJNA502417 | SRR8361102 | ocean |\n",
+ "| 27 | PRJNA468331 | SRR7962479 | PRJNA468209 | SRR7593830 | ocean |\n",
+ "| 34 | PRJNA502465 | SRR8297868 | PRJNA467732 | SRR7594085 | ocean |\n",
+ "| 4 | PRJNA468146 | SRR7610452 | PRJNA467723 | SRR7592236 | ocean |\n",
+ "| 57 | PRJNA468147 | SRR7610935 | PRJNA502423 | SRR8361526 | ocean |\n",
+ "| 1 | PRJNA502466 | SRR8361608 | PRJNA467720 | SRR7592212 | ocean |\n",
+ "| 47 | PRJNA468148 | SRR7610853 | PRJNA467740 | SRR7600454 | ocean |\n",
+ "| 9 | PRJNA502467 | SRR8297869 | PRJNA502408 | SRR8360547 | ocean |\n",
+ "| 77 | PRJNA468324 | SRR7962474 | PRJNA467758 | SRR7608832 | ocean |\n",
+ "| 55 | PRJNA467784 | SRR7610867 | PRJNA502422 | SRR8361371 | ocean |\n",
+ "| 10 | PRJNA468325 | SRR7962473 | PRJNA502409 | SRR8360543 | ocean |\n",
+ "| 11 | PRJNA468326 | SRR7962475 | PRJNA467725 | SRR7592321 | ocean |\n",
+ "| 58 | PRJNA502468 | SRR8297878 | PRJNA502424 | SRR8361527 | ocean |\n",
+ "| 52 | PRJNA468327 | SRR7962478 | PRJNA467742 | SRR7600455 | ocean |\n",
+ "| 51 | PRJNA468149 | SRR7611057 | PRJNA502420 | SRR8361350 | ocean |\n",
+ "| 50 | PRJNA468328 | SRR7962477 | PRJNA502419 | SRR8361349 | ocean |\n",
+ "| 90 | PRJNA467785 | SRR7611056 | PRJNA467768 | SRR7609452 | ocean |\n",
+ "| 80 | PRJNA467786 | SRR7611058 | PRJNA467761 | SRR7609166 | ocean |\n",
+ "| 89 | PRJNA468150 | SRR7611500 | PRJNA467766 | SRR7609363 | ocean |\n",
+ "| 35 | PRJNA467787 | SRR7612580 | PRJNA502441 | SRR8361101 | ocean |\n",
+ "| 71 | PRJNA468329 | SRR7962476 | PRJNA467753 | SRR7608017 | ocean |\n",
+ "| 31 | PRJNA468151 | SRR7611499 | PRJNA467731 | SRR7593934 | ocean |\n",
+ "| 84 | PRJNA502451 | SRR8393198 | PRJNA467764 | SRR7609361 | ocean |\n",
+ "| 61 | PRJNA467788 | SRR7615219 | PRJNA502426 | SRR8361516 | ocean |\n",
+ "| 36_r | PRJNA468330 | SRR7962481 | PRJNA467770 | SRR7609462 | ocean |\n",
+ "| 82 | PRJNA502452 | SRR8361531 | PRJNA467763 | SRR7609344 | ocean |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 26 PRJNA502453 SRR8361532 PRJNA467728 \n",
+ "2 58_r PRJNA502454 SRR8361534 PRJNA467772 \n",
+ "3 125_r PRJNA468332 SRR7962480 PRJNA467773 \n",
+ "4 8 PRJNA502455 SRR8297879 PRJNA467724 \n",
+ "5 85 PRJNA502456 SRR8297845 PRJNA467765 \n",
+ "6 76 PRJNA468305 SRR7949679 PRJNA467757 \n",
+ "7 40 PRJNA467774 SRR7609608 PRJNA467736 \n",
+ "8 53 PRJNA468299 SRR7609574 PRJNA502421 \n",
+ "9 73 PRJNA468306 SRR7949683 PRJNA467754 \n",
+ "10 23 PRJNA467775 SRR7609632 PRJNA467727 \n",
+ "11 49 PRJNA467776 SRR7609653 PRJNA502442 \n",
+ "12 14 PRJNA468308 SRR7949682 PRJNA502410 \n",
+ "13 44 PRJNA502457 SRR8361579 PRJNA467738 \n",
+ "14 29 PRJNA468309 SRR7949681 PRJNA467730 \n",
+ "15 18 PRJNA467777 SRR7609655 PRJNA502413 \n",
+ "16 24 PRJNA502458 SRR8361603 PRJNA502440 \n",
+ "17 79 PRJNA468143 SRR7609654 PRJNA467760 \n",
+ "18 22 PRJNA468300 SRR7609599 PRJNA502414 \n",
+ "19 21 PRJNA468310 SRR7949678 PRJNA468208 \n",
+ "20 60 PRJNA467778 SRR7609828 PRJNA502425 \n",
+ "21 15 PRJNA468144 SRR7609827 PRJNA502411 \n",
+ "22 75 PRJNA502459 SRR8361602 PRJNA467756 \n",
+ "23 3 PRJNA468311 SRR7949692 PRJNA467722 \n",
+ "24 20 PRJNA467779 SRR7609829 PRJNA467726 \n",
+ "25 91 PRJNA468312 SRR7949677 PRJNA467769 \n",
+ "26 56 PRJNA468152 SRR7615220 PRJNA467743 \n",
+ "27 46 PRJNA502460 SRR8297843 PRJNA468213 \n",
+ "28 2 PRJNA468313 SRR7609607 PRJNA467721 \n",
+ "29 38 PRJNA468301 SRR7609600 PRJNA468211 \n",
+ "30 45 PRJNA468314 SRR7962234 PRJNA467739 \n",
+ "⋮ ⋮ ⋮ ⋮ ⋮ \n",
+ "45 32 PRJNA468321 SRR7962267 PRJNA468210 \n",
+ "46 81 PRJNA468145 SRR7610454 PRJNA467762 \n",
+ "47 70 PRJNA468322 SRR7962239 PRJNA467752 \n",
+ "48 67 PRJNA468323 SRR7962329 PRJNA467750 \n",
+ "49 33 PRJNA467783 SRR7610453 PRJNA502417 \n",
+ "50 27 PRJNA468331 SRR7962479 PRJNA468209 \n",
+ "51 34 PRJNA502465 SRR8297868 PRJNA467732 \n",
+ "52 4 PRJNA468146 SRR7610452 PRJNA467723 \n",
+ "53 57 PRJNA468147 SRR7610935 PRJNA502423 \n",
+ "54 1 PRJNA502466 SRR8361608 PRJNA467720 \n",
+ "55 47 PRJNA468148 SRR7610853 PRJNA467740 \n",
+ "56 9 PRJNA502467 SRR8297869 PRJNA502408 \n",
+ "57 77 PRJNA468324 SRR7962474 PRJNA467758 \n",
+ "58 55 PRJNA467784 SRR7610867 PRJNA502422 \n",
+ "59 10 PRJNA468325 SRR7962473 PRJNA502409 \n",
+ "60 11 PRJNA468326 SRR7962475 PRJNA467725 \n",
+ "61 58 PRJNA502468 SRR8297878 PRJNA502424 \n",
+ "62 52 PRJNA468327 SRR7962478 PRJNA467742 \n",
+ "63 51 PRJNA468149 SRR7611057 PRJNA502420 \n",
+ "64 50 PRJNA468328 SRR7962477 PRJNA502419 \n",
+ "65 90 PRJNA467785 SRR7611056 PRJNA467768 \n",
+ "66 80 PRJNA467786 SRR7611058 PRJNA467761 \n",
+ "67 89 PRJNA468150 SRR7611500 PRJNA467766 \n",
+ "68 35 PRJNA467787 SRR7612580 PRJNA502441 \n",
+ "69 71 PRJNA468329 SRR7962476 PRJNA467753 \n",
+ "70 31 PRJNA468151 SRR7611499 PRJNA467731 \n",
+ "71 84 PRJNA502451 SRR8393198 PRJNA467764 \n",
+ "72 61 PRJNA467788 SRR7615219 PRJNA502426 \n",
+ "73 36_r PRJNA468330 SRR7962481 PRJNA467770 \n",
+ "74 82 PRJNA502452 SRR8361531 PRJNA467763 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR7592711 ocean \n",
+ "2 SRR7609573 ocean \n",
+ "3 SRR7609569 ocean \n",
+ "4 SRR7592287 ocean \n",
+ "5 SRR7609362 ocean \n",
+ "6 SRR7608731 ocean \n",
+ "7 SRR7595425 ocean \n",
+ "8 SRR8361352 ocean \n",
+ "9 SRR7608223 ocean \n",
+ "10 SRR7633009 ocean \n",
+ "11 SRR8361104 ocean \n",
+ "12 SRR8360542 ocean \n",
+ "13 SRR7600393 ocean \n",
+ "14 SRR7593831 ocean \n",
+ "15 SRR8360544 ocean \n",
+ "16 SRR8360549 ocean \n",
+ "17 SRR7609143 ocean \n",
+ "18 SRR8360545 ocean \n",
+ "19 SRR7592488 ocean \n",
+ "20 SRR8361517 ocean \n",
+ "21 SRR8360457 ocean \n",
+ "22 SRR7608423 ocean \n",
+ "23 SRR7592226 ocean \n",
+ "24 SRR7592489 ocean \n",
+ "25 SRR7609461 ocean \n",
+ "26 SRR7601261 ocean \n",
+ "27 SRR7600395 ocean \n",
+ "28 SRR7592225 ocean \n",
+ "29 SRR7594461 ocean \n",
+ "30 SRR7600394 ocean \n",
+ "⋮ ⋮ ⋮ \n",
+ "45 SRR7594064 ocean \n",
+ "46 SRR7609163 ocean \n",
+ "47 SRR7608010 ocean \n",
+ "48 SRR7607757 ocean \n",
+ "49 SRR8361102 ocean \n",
+ "50 SRR7593830 ocean \n",
+ "51 SRR7594085 ocean \n",
+ "52 SRR7592236 ocean \n",
+ "53 SRR8361526 ocean \n",
+ "54 SRR7592212 ocean \n",
+ "55 SRR7600454 ocean \n",
+ "56 SRR8360547 ocean \n",
+ "57 SRR7608832 ocean \n",
+ "58 SRR8361371 ocean \n",
+ "59 SRR8360543 ocean \n",
+ "60 SRR7592321 ocean \n",
+ "61 SRR8361527 ocean \n",
+ "62 SRR7600455 ocean \n",
+ "63 SRR8361350 ocean \n",
+ "64 SRR8361349 ocean \n",
+ "65 SRR7609452 ocean \n",
+ "66 SRR7609166 ocean \n",
+ "67 SRR7609363 ocean \n",
+ "68 SRR8361101 ocean \n",
+ "69 SRR7608017 ocean \n",
+ "70 SRR7593934 ocean \n",
+ "71 SRR7609361 ocean \n",
+ "72 SRR8361516 ocean \n",
+ "73 SRR7609462 ocean \n",
+ "74 SRR7609344 ocean "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "moran010B_mtx_vs_mgx <- moran010B %>%\n",
+ " left_join(moran010B_run_accessions, by = c(\"mtx_study_accession\" = \"study_accession\")) %>%\n",
+ " select(sample_name, mtx_study_accession, mtx_run_accession = run_accession, mgx_study_accession) %>%\n",
+ " left_join(moran010B_run_accessions, by = c(\"mgx_study_accession\" = \"study_accession\")) %>%\n",
+ " select(sample_name, mtx_study_accession, mtx_run_accession, mgx_study_accession, mgx_run_accession = run_accession) %>%\n",
+ " mutate(sample_type = \"ocean\")\n",
+ "\n",
+ "moran010B_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2261971e",
+ "metadata": {},
+ "source": [
+ "## PRJNA603240"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "1c8d6131",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna603240 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA603240&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "155a92e3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 10 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tsample_106 | PRJNA603240 | SRR10968230 | PRJNA603240 | SRR10968263 | deadwood |
\n",
+ "\tsample_069 | PRJNA603240 | SRR10968231 | PRJNA603240 | SRR10968264 | deadwood |
\n",
+ "\tsample_055 | PRJNA603240 | SRR10968232 | PRJNA603240 | SRR10968265 | deadwood |
\n",
+ "\tsample_049 | PRJNA603240 | SRR10968233 | PRJNA603240 | SRR10968266 | deadwood |
\n",
+ "\tsample_031 | PRJNA603240 | SRR10968234 | PRJNA603240 | SRR10968267 | deadwood |
\n",
+ "\tsample_116 | PRJNA603240 | SRR10968236 | PRJNA603240 | SRR10968225 | deadwood |
\n",
+ "\tsample_110 | PRJNA603240 | SRR10968237 | PRJNA603240 | SRR10968226 | deadwood |
\n",
+ "\tsample_044 | PRJNA603240 | SRR10968238 | PRJNA603240 | SRR10968227 | deadwood |
\n",
+ "\tsample_007 | PRJNA603240 | SRR10968239 | PRJNA603240 | SRR10968228 | deadwood |
\n",
+ "\tsample_006 | PRJNA603240 | SRR10968240 | PRJNA603240 | SRR10968229 | deadwood |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 10 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t sample\\_106 & PRJNA603240 & SRR10968230 & PRJNA603240 & SRR10968263 & deadwood\\\\\n",
+ "\t sample\\_069 & PRJNA603240 & SRR10968231 & PRJNA603240 & SRR10968264 & deadwood\\\\\n",
+ "\t sample\\_055 & PRJNA603240 & SRR10968232 & PRJNA603240 & SRR10968265 & deadwood\\\\\n",
+ "\t sample\\_049 & PRJNA603240 & SRR10968233 & PRJNA603240 & SRR10968266 & deadwood\\\\\n",
+ "\t sample\\_031 & PRJNA603240 & SRR10968234 & PRJNA603240 & SRR10968267 & deadwood\\\\\n",
+ "\t sample\\_116 & PRJNA603240 & SRR10968236 & PRJNA603240 & SRR10968225 & deadwood\\\\\n",
+ "\t sample\\_110 & PRJNA603240 & SRR10968237 & PRJNA603240 & SRR10968226 & deadwood\\\\\n",
+ "\t sample\\_044 & PRJNA603240 & SRR10968238 & PRJNA603240 & SRR10968227 & deadwood\\\\\n",
+ "\t sample\\_007 & PRJNA603240 & SRR10968239 & PRJNA603240 & SRR10968228 & deadwood\\\\\n",
+ "\t sample\\_006 & PRJNA603240 & SRR10968240 & PRJNA603240 & SRR10968229 & deadwood\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 10 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| sample_106 | PRJNA603240 | SRR10968230 | PRJNA603240 | SRR10968263 | deadwood |\n",
+ "| sample_069 | PRJNA603240 | SRR10968231 | PRJNA603240 | SRR10968264 | deadwood |\n",
+ "| sample_055 | PRJNA603240 | SRR10968232 | PRJNA603240 | SRR10968265 | deadwood |\n",
+ "| sample_049 | PRJNA603240 | SRR10968233 | PRJNA603240 | SRR10968266 | deadwood |\n",
+ "| sample_031 | PRJNA603240 | SRR10968234 | PRJNA603240 | SRR10968267 | deadwood |\n",
+ "| sample_116 | PRJNA603240 | SRR10968236 | PRJNA603240 | SRR10968225 | deadwood |\n",
+ "| sample_110 | PRJNA603240 | SRR10968237 | PRJNA603240 | SRR10968226 | deadwood |\n",
+ "| sample_044 | PRJNA603240 | SRR10968238 | PRJNA603240 | SRR10968227 | deadwood |\n",
+ "| sample_007 | PRJNA603240 | SRR10968239 | PRJNA603240 | SRR10968228 | deadwood |\n",
+ "| sample_006 | PRJNA603240 | SRR10968240 | PRJNA603240 | SRR10968229 | deadwood |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 sample_106 PRJNA603240 SRR10968230 PRJNA603240 \n",
+ "2 sample_069 PRJNA603240 SRR10968231 PRJNA603240 \n",
+ "3 sample_055 PRJNA603240 SRR10968232 PRJNA603240 \n",
+ "4 sample_049 PRJNA603240 SRR10968233 PRJNA603240 \n",
+ "5 sample_031 PRJNA603240 SRR10968234 PRJNA603240 \n",
+ "6 sample_116 PRJNA603240 SRR10968236 PRJNA603240 \n",
+ "7 sample_110 PRJNA603240 SRR10968237 PRJNA603240 \n",
+ "8 sample_044 PRJNA603240 SRR10968238 PRJNA603240 \n",
+ "9 sample_007 PRJNA603240 SRR10968239 PRJNA603240 \n",
+ "10 sample_006 PRJNA603240 SRR10968240 PRJNA603240 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR10968263 deadwood \n",
+ "2 SRR10968264 deadwood \n",
+ "3 SRR10968265 deadwood \n",
+ "4 SRR10968266 deadwood \n",
+ "5 SRR10968267 deadwood \n",
+ "6 SRR10968225 deadwood \n",
+ "7 SRR10968226 deadwood \n",
+ "8 SRR10968227 deadwood \n",
+ "9 SRR10968228 deadwood \n",
+ "10 SRR10968229 deadwood "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna603240_mtx <- prjna603240 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " filter(library_selection == \"RANDOM PCR\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna603240_mgx <- prjna603240 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna603240_mtx_vs_mgx <- left_join(prjna603240_mtx, prjna603240_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"deadwood\")\n",
+ "\n",
+ "prjna603240_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4bf62859",
+ "metadata": {},
+ "source": [
+ "## PRJNA202380"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "10459508",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna202380 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA202380&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "de539bd6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna202380 <- prjna202380 %>%\n",
+ " filter(instrument_platform == \"ILLUMINA\") %>%\n",
+ " mutate(sample_name = gsub(\"Illumina HiSeq 2000 paired end sequencing; \", \"\", experiment_title),\n",
+ " sample_name = gsub(\" metatranscri[op]tomic sample\", \"\", sample_name),\n",
+ " sample_name = gsub(\" metagenomic sample\", \"\", sample_name),\n",
+ " sample_name = gsub(\"Illumina HiSeq 2000 sequencing; \", \"\", sample_name)) %>%\n",
+ " select(study_accession, run_accession,library_source, sample_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "4b7fc613",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 20 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tTag1363-1 | PRJNA202380 | SRR1138694 | PRJNA202380 | SRR1267595 | sheep_rumen |
\n",
+ "\tTag1111-1 | PRJNA202380 | SRR1138697 | PRJNA202380 | SRR1222429 | sheep_rumen |
\n",
+ "\tTag1111-2 | PRJNA202380 | SRR1138702 | PRJNA202380 | SRR1222431 | sheep_rumen |
\n",
+ "\tTag1234-1 | PRJNA202380 | SRR1206249 | PRJNA202380 | SRR1206671 | sheep_rumen |
\n",
+ "\tTag1494-1 | PRJNA202380 | SRR873450 | PRJNA202380 | SRR873595 | sheep_rumen |
\n",
+ "\tTag1283-1 | PRJNA202380 | SRR873451 | PRJNA202380 | SRR873596 | sheep_rumen |
\n",
+ "\tTag1435-1 | PRJNA202380 | SRR873452 | PRJNA202380 | SRR873597 | sheep_rumen |
\n",
+ "\tTag1494-2 | PRJNA202380 | SRR873453 | PRJNA202380 | SRR873598 | sheep_rumen |
\n",
+ "\tTag1265-1 | PRJNA202380 | SRR873454 | PRJNA202380 | SRR873599 | sheep_rumen |
\n",
+ "\tTag1435-2 | PRJNA202380 | SRR873455 | PRJNA202380 | SRR873600 | sheep_rumen |
\n",
+ "\tTag1283-2 | PRJNA202380 | SRR873456 | PRJNA202380 | SRR873601 | sheep_rumen |
\n",
+ "\tTag1265-2 | PRJNA202380 | SRR873457 | PRJNA202380 | SRR873602 | sheep_rumen |
\n",
+ "\tTag1363-2 | PRJNA202380 | SRR873458 | PRJNA202380 | SRR873603 | sheep_rumen |
\n",
+ "\tTag1174-1 | PRJNA202380 | SRR873459 | PRJNA202380 | SRR873604 | sheep_rumen |
\n",
+ "\tTag1234-2 | PRJNA202380 | SRR873460 | PRJNA202380 | SRR873605 | sheep_rumen |
\n",
+ "\tTag1586-1 | PRJNA202380 | SRR873461 | PRJNA202380 | SRR873606 | sheep_rumen |
\n",
+ "\tTag1586-2 | PRJNA202380 | SRR873462 | PRJNA202380 | SRR873607 | sheep_rumen |
\n",
+ "\tTag1333-1 | PRJNA202380 | SRR873463 | PRJNA202380 | SRR873608 | sheep_rumen |
\n",
+ "\tTag1174-2 | PRJNA202380 | SRR873464 | PRJNA202380 | SRR873609 | sheep_rumen |
\n",
+ "\tTag1333-2 | PRJNA202380 | SRR873465 | PRJNA202380 | SRR873610 | sheep_rumen |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 20 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t Tag1363-1 & PRJNA202380 & SRR1138694 & PRJNA202380 & SRR1267595 & sheep\\_rumen\\\\\n",
+ "\t Tag1111-1 & PRJNA202380 & SRR1138697 & PRJNA202380 & SRR1222429 & sheep\\_rumen\\\\\n",
+ "\t Tag1111-2 & PRJNA202380 & SRR1138702 & PRJNA202380 & SRR1222431 & sheep\\_rumen\\\\\n",
+ "\t Tag1234-1 & PRJNA202380 & SRR1206249 & PRJNA202380 & SRR1206671 & sheep\\_rumen\\\\\n",
+ "\t Tag1494-1 & PRJNA202380 & SRR873450 & PRJNA202380 & SRR873595 & sheep\\_rumen\\\\\n",
+ "\t Tag1283-1 & PRJNA202380 & SRR873451 & PRJNA202380 & SRR873596 & sheep\\_rumen\\\\\n",
+ "\t Tag1435-1 & PRJNA202380 & SRR873452 & PRJNA202380 & SRR873597 & sheep\\_rumen\\\\\n",
+ "\t Tag1494-2 & PRJNA202380 & SRR873453 & PRJNA202380 & SRR873598 & sheep\\_rumen\\\\\n",
+ "\t Tag1265-1 & PRJNA202380 & SRR873454 & PRJNA202380 & SRR873599 & sheep\\_rumen\\\\\n",
+ "\t Tag1435-2 & PRJNA202380 & SRR873455 & PRJNA202380 & SRR873600 & sheep\\_rumen\\\\\n",
+ "\t Tag1283-2 & PRJNA202380 & SRR873456 & PRJNA202380 & SRR873601 & sheep\\_rumen\\\\\n",
+ "\t Tag1265-2 & PRJNA202380 & SRR873457 & PRJNA202380 & SRR873602 & sheep\\_rumen\\\\\n",
+ "\t Tag1363-2 & PRJNA202380 & SRR873458 & PRJNA202380 & SRR873603 & sheep\\_rumen\\\\\n",
+ "\t Tag1174-1 & PRJNA202380 & SRR873459 & PRJNA202380 & SRR873604 & sheep\\_rumen\\\\\n",
+ "\t Tag1234-2 & PRJNA202380 & SRR873460 & PRJNA202380 & SRR873605 & sheep\\_rumen\\\\\n",
+ "\t Tag1586-1 & PRJNA202380 & SRR873461 & PRJNA202380 & SRR873606 & sheep\\_rumen\\\\\n",
+ "\t Tag1586-2 & PRJNA202380 & SRR873462 & PRJNA202380 & SRR873607 & sheep\\_rumen\\\\\n",
+ "\t Tag1333-1 & PRJNA202380 & SRR873463 & PRJNA202380 & SRR873608 & sheep\\_rumen\\\\\n",
+ "\t Tag1174-2 & PRJNA202380 & SRR873464 & PRJNA202380 & SRR873609 & sheep\\_rumen\\\\\n",
+ "\t Tag1333-2 & PRJNA202380 & SRR873465 & PRJNA202380 & SRR873610 & sheep\\_rumen\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 20 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| Tag1363-1 | PRJNA202380 | SRR1138694 | PRJNA202380 | SRR1267595 | sheep_rumen |\n",
+ "| Tag1111-1 | PRJNA202380 | SRR1138697 | PRJNA202380 | SRR1222429 | sheep_rumen |\n",
+ "| Tag1111-2 | PRJNA202380 | SRR1138702 | PRJNA202380 | SRR1222431 | sheep_rumen |\n",
+ "| Tag1234-1 | PRJNA202380 | SRR1206249 | PRJNA202380 | SRR1206671 | sheep_rumen |\n",
+ "| Tag1494-1 | PRJNA202380 | SRR873450 | PRJNA202380 | SRR873595 | sheep_rumen |\n",
+ "| Tag1283-1 | PRJNA202380 | SRR873451 | PRJNA202380 | SRR873596 | sheep_rumen |\n",
+ "| Tag1435-1 | PRJNA202380 | SRR873452 | PRJNA202380 | SRR873597 | sheep_rumen |\n",
+ "| Tag1494-2 | PRJNA202380 | SRR873453 | PRJNA202380 | SRR873598 | sheep_rumen |\n",
+ "| Tag1265-1 | PRJNA202380 | SRR873454 | PRJNA202380 | SRR873599 | sheep_rumen |\n",
+ "| Tag1435-2 | PRJNA202380 | SRR873455 | PRJNA202380 | SRR873600 | sheep_rumen |\n",
+ "| Tag1283-2 | PRJNA202380 | SRR873456 | PRJNA202380 | SRR873601 | sheep_rumen |\n",
+ "| Tag1265-2 | PRJNA202380 | SRR873457 | PRJNA202380 | SRR873602 | sheep_rumen |\n",
+ "| Tag1363-2 | PRJNA202380 | SRR873458 | PRJNA202380 | SRR873603 | sheep_rumen |\n",
+ "| Tag1174-1 | PRJNA202380 | SRR873459 | PRJNA202380 | SRR873604 | sheep_rumen |\n",
+ "| Tag1234-2 | PRJNA202380 | SRR873460 | PRJNA202380 | SRR873605 | sheep_rumen |\n",
+ "| Tag1586-1 | PRJNA202380 | SRR873461 | PRJNA202380 | SRR873606 | sheep_rumen |\n",
+ "| Tag1586-2 | PRJNA202380 | SRR873462 | PRJNA202380 | SRR873607 | sheep_rumen |\n",
+ "| Tag1333-1 | PRJNA202380 | SRR873463 | PRJNA202380 | SRR873608 | sheep_rumen |\n",
+ "| Tag1174-2 | PRJNA202380 | SRR873464 | PRJNA202380 | SRR873609 | sheep_rumen |\n",
+ "| Tag1333-2 | PRJNA202380 | SRR873465 | PRJNA202380 | SRR873610 | sheep_rumen |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 Tag1363-1 PRJNA202380 SRR1138694 PRJNA202380 \n",
+ "2 Tag1111-1 PRJNA202380 SRR1138697 PRJNA202380 \n",
+ "3 Tag1111-2 PRJNA202380 SRR1138702 PRJNA202380 \n",
+ "4 Tag1234-1 PRJNA202380 SRR1206249 PRJNA202380 \n",
+ "5 Tag1494-1 PRJNA202380 SRR873450 PRJNA202380 \n",
+ "6 Tag1283-1 PRJNA202380 SRR873451 PRJNA202380 \n",
+ "7 Tag1435-1 PRJNA202380 SRR873452 PRJNA202380 \n",
+ "8 Tag1494-2 PRJNA202380 SRR873453 PRJNA202380 \n",
+ "9 Tag1265-1 PRJNA202380 SRR873454 PRJNA202380 \n",
+ "10 Tag1435-2 PRJNA202380 SRR873455 PRJNA202380 \n",
+ "11 Tag1283-2 PRJNA202380 SRR873456 PRJNA202380 \n",
+ "12 Tag1265-2 PRJNA202380 SRR873457 PRJNA202380 \n",
+ "13 Tag1363-2 PRJNA202380 SRR873458 PRJNA202380 \n",
+ "14 Tag1174-1 PRJNA202380 SRR873459 PRJNA202380 \n",
+ "15 Tag1234-2 PRJNA202380 SRR873460 PRJNA202380 \n",
+ "16 Tag1586-1 PRJNA202380 SRR873461 PRJNA202380 \n",
+ "17 Tag1586-2 PRJNA202380 SRR873462 PRJNA202380 \n",
+ "18 Tag1333-1 PRJNA202380 SRR873463 PRJNA202380 \n",
+ "19 Tag1174-2 PRJNA202380 SRR873464 PRJNA202380 \n",
+ "20 Tag1333-2 PRJNA202380 SRR873465 PRJNA202380 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR1267595 sheep_rumen\n",
+ "2 SRR1222429 sheep_rumen\n",
+ "3 SRR1222431 sheep_rumen\n",
+ "4 SRR1206671 sheep_rumen\n",
+ "5 SRR873595 sheep_rumen\n",
+ "6 SRR873596 sheep_rumen\n",
+ "7 SRR873597 sheep_rumen\n",
+ "8 SRR873598 sheep_rumen\n",
+ "9 SRR873599 sheep_rumen\n",
+ "10 SRR873600 sheep_rumen\n",
+ "11 SRR873601 sheep_rumen\n",
+ "12 SRR873602 sheep_rumen\n",
+ "13 SRR873603 sheep_rumen\n",
+ "14 SRR873604 sheep_rumen\n",
+ "15 SRR873605 sheep_rumen\n",
+ "16 SRR873606 sheep_rumen\n",
+ "17 SRR873607 sheep_rumen\n",
+ "18 SRR873608 sheep_rumen\n",
+ "19 SRR873609 sheep_rumen\n",
+ "20 SRR873610 sheep_rumen"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna202380_mtx <- prjna202380 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna202380_mgx <- prjna202380 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna202380_mtx_vs_mgx <- left_join(prjna202380_mtx, prjna202380_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"sheep_rumen\")\n",
+ "\n",
+ "prjna202380_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cde73933",
+ "metadata": {},
+ "source": [
+ "## PRJNA541981"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "1a764bbb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna541981 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA541981&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "378b1ac3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna541981_mtx <- prjna541981 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna541981_mgx <- prjna541981 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna541981_mtx_vs_mgx <- left_join(prjna541981_mtx, prjna541981_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"human_skin\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae3f271d",
+ "metadata": {},
+ "source": [
+ "## PRJNA797778"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "a1b07276",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna797778 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA797778&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "d479bfe9",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 180 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tUAB088_W2D1 | PRJNA797778 | SRR17858151 | PRJNA797778 | SRR17635672 | human_vagina |
\n",
+ "\tUAB082_W10D2 | PRJNA797778 | SRR17858152 | PRJNA797778 | SRR17635674 | human_vagina |
\n",
+ "\tUAB082_W5D7 | PRJNA797778 | SRR17858153 | PRJNA797778 | SRR17635676 | human_vagina |
\n",
+ "\tUAB082_W3D7 | PRJNA797778 | SRR17858154 | PRJNA797778 | SRR17635677 | human_vagina |
\n",
+ "\tUAB082_W2D5 | PRJNA797778 | SRR17858155 | PRJNA797778 | SRR17635678 | human_vagina |
\n",
+ "\tUAB079_W10D1 | PRJNA797778 | SRR17858156 | PRJNA797778 | SRR17635679 | human_vagina |
\n",
+ "\tUAB079_W7D6 | PRJNA797778 | SRR17858157 | PRJNA797778 | SRR17635680 | human_vagina |
\n",
+ "\tUAB079_W6D7 | PRJNA797778 | SRR17858158 | PRJNA797778 | SRR17635681 | human_vagina |
\n",
+ "\tUAB079_W3D7 | PRJNA797778 | SRR17858159 | PRJNA797778 | SRR17635682 | human_vagina |
\n",
+ "\tUAB079_W2D2 | PRJNA797778 | SRR17858160 | PRJNA797778 | SRR17635683 | human_vagina |
\n",
+ "\tUAB006_W2D2 | PRJNA797778 | SRR17858161 | PRJNA797778 | SRR17635801 | human_vagina |
\n",
+ "\tUAB077_W9D7 | PRJNA797778 | SRR17858162 | PRJNA797778 | SRR17635685 | human_vagina |
\n",
+ "\tUAB077_W8D2 | PRJNA797778 | SRR17858163 | PRJNA797778 | SRR17635686 | human_vagina |
\n",
+ "\tUAB077_W5D7 | PRJNA797778 | SRR17858164 | PRJNA797778 | SRR17635687 | human_vagina |
\n",
+ "\tUAB077_W3D1 | PRJNA797778 | SRR17858165 | PRJNA797778 | SRR17635688 | human_vagina |
\n",
+ "\tUAB077_W1D7 | PRJNA797778 | SRR17858166 | PRJNA797778 | SRR17635689 | human_vagina |
\n",
+ "\tUAB071_W10D2 | PRJNA797778 | SRR17858167 | PRJNA797778 | SRR17635690 | human_vagina |
\n",
+ "\tUAB071_W7D7 | PRJNA797778 | SRR17858168 | PRJNA797778 | SRR17635691 | human_vagina |
\n",
+ "\tUAB071_W5D7 | PRJNA797778 | SRR17858169 | PRJNA797778 | SRR17635692 | human_vagina |
\n",
+ "\tUAB071_W3D7 | PRJNA797778 | SRR17858170 | PRJNA797778 | SRR17635693 | human_vagina |
\n",
+ "\tUAB071_W1D7 | PRJNA797778 | SRR17858171 | PRJNA797778 | SRR17635694 | human_vagina |
\n",
+ "\tUAB002_W5D7 | PRJNA797778 | SRR17858172 | PRJNA797778 | SRR17635804 | human_vagina |
\n",
+ "\tAYAC02_W9D3 | PRJNA797778 | SRR17858173 | PRJNA797778 | SRR17635818 | human_vagina |
\n",
+ "\tUAB060_W10D2 | PRJNA797778 | SRR17858174 | PRJNA797778 | SRR17635696 | human_vagina |
\n",
+ "\tUAB060_W7D7 | PRJNA797778 | SRR17858175 | PRJNA797778 | SRR17635697 | human_vagina |
\n",
+ "\tUAB060_W5D7 | PRJNA797778 | SRR17858176 | PRJNA797778 | SRR17635698 | human_vagina |
\n",
+ "\tUAB060_W3D7 | PRJNA797778 | SRR17858177 | PRJNA797778 | SRR17635763 | human_vagina |
\n",
+ "\tUAB060_W1D6 | PRJNA797778 | SRR17858178 | PRJNA797778 | SRR17635764 | human_vagina |
\n",
+ "\tUAB052_W10D2 | PRJNA797778 | SRR17858179 | PRJNA797778 | SRR17635765 | human_vagina |
\n",
+ "\tUAB052_W8D3 | PRJNA797778 | SRR17858180 | PRJNA797778 | SRR17635766 | human_vagina |
\n",
+ "\t⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
\n",
+ "\tUAB116_W1D7 | PRJNA797778 | SRR17858301 | PRJNA797778 | SRR17635741 | human_vagina |
\n",
+ "\tUAB110_W10D4 | PRJNA797778 | SRR17858302 | PRJNA797778 | SRR17635742 | human_vagina |
\n",
+ "\tUAB110_W7D7 | PRJNA797778 | SRR17858303 | PRJNA797778 | SRR17635743 | human_vagina |
\n",
+ "\tUAB110_W6D2 | PRJNA797778 | SRR17858304 | PRJNA797778 | SRR17635744 | human_vagina |
\n",
+ "\tUAB110_W3D7 | PRJNA797778 | SRR17858305 | PRJNA797778 | SRR17635745 | human_vagina |
\n",
+ "\tUAB010_W5D6 | PRJNA797778 | SRR17858306 | PRJNA797778 | SRR17635490 | human_vagina |
\n",
+ "\tUAB110_W1D7 | PRJNA797778 | SRR17858307 | PRJNA797778 | SRR17635746 | human_vagina |
\n",
+ "\tUAB106_W9D7 | PRJNA797778 | SRR17858308 | PRJNA797778 | SRR17635748 | human_vagina |
\n",
+ "\tUAB106_W7D7 | PRJNA797778 | SRR17858309 | PRJNA797778 | SRR17635749 | human_vagina |
\n",
+ "\tUAB106_W5D4 | PRJNA797778 | SRR17858310 | PRJNA797778 | SRR17635750 | human_vagina |
\n",
+ "\tUAB106_W3D7 | PRJNA797778 | SRR17858311 | PRJNA797778 | SRR17635751 | human_vagina |
\n",
+ "\tUAB106_W1D7 | PRJNA797778 | SRR17858312 | PRJNA797778 | SRR17635752 | human_vagina |
\n",
+ "\tUAB096_W9D7 | PRJNA797778 | SRR17858313 | PRJNA797778 | SRR17635753 | human_vagina |
\n",
+ "\tUAB096_W7D3 | PRJNA797778 | SRR17858314 | PRJNA797778 | SRR17635754 | human_vagina |
\n",
+ "\tUAB096_W5D3 | PRJNA797778 | SRR17858315 | PRJNA797778 | SRR17635755 | human_vagina |
\n",
+ "\tUAB096_W3D3 | PRJNA797778 | SRR17858316 | PRJNA797778 | SRR17635756 | human_vagina |
\n",
+ "\tUAB010_W3D1 | PRJNA797778 | SRR17858317 | PRJNA797778 | SRR17635491 | human_vagina |
\n",
+ "\tUAB096_W1D5 | PRJNA797778 | SRR17858318 | PRJNA797778 | SRR17635757 | human_vagina |
\n",
+ "\tUAB093_W9D6 | PRJNA797778 | SRR17858319 | PRJNA797778 | SRR17635759 | human_vagina |
\n",
+ "\tUAB093_W7D7 | PRJNA797778 | SRR17858320 | PRJNA797778 | SRR17635760 | human_vagina |
\n",
+ "\tUAB093_W5D7 | PRJNA797778 | SRR17858321 | PRJNA797778 | SRR17635761 | human_vagina |
\n",
+ "\tUAB093_W4D2 | PRJNA797778 | SRR17858322 | PRJNA797778 | SRR17635762 | human_vagina |
\n",
+ "\tUAB093_W1D7 | PRJNA797778 | SRR17858323 | PRJNA797778 | SRR17635667 | human_vagina |
\n",
+ "\tUAB088_W10D1 | PRJNA797778 | SRR17858324 | PRJNA797778 | SRR17635668 | human_vagina |
\n",
+ "\tUAB088_W7D6 | PRJNA797778 | SRR17858325 | PRJNA797778 | SRR17635669 | human_vagina |
\n",
+ "\tUAB088_W6D1 | PRJNA797778 | SRR17858326 | PRJNA797778 | SRR17635670 | human_vagina |
\n",
+ "\tUAB088_W4D1 | PRJNA797778 | SRR17858327 | PRJNA797778 | SRR17635671 | human_vagina |
\n",
+ "\tUAB007_W1D7 | PRJNA797778 | SRR17858328 | PRJNA797778 | SRR17635795 | human_vagina |
\n",
+ "\tAYAC02_W1D7 | PRJNA797778 | SRR17858329 | PRJNA797778 | SRR17635794 | human_vagina |
\n",
+ "\tEM04_W3D7 | PRJNA797778 | SRR17858330 | PRJNA797778 | SRR17635711 | human_vagina |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 180 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t UAB088\\_W2D1 & PRJNA797778 & SRR17858151 & PRJNA797778 & SRR17635672 & human\\_vagina\\\\\n",
+ "\t UAB082\\_W10D2 & PRJNA797778 & SRR17858152 & PRJNA797778 & SRR17635674 & human\\_vagina\\\\\n",
+ "\t UAB082\\_W5D7 & PRJNA797778 & SRR17858153 & PRJNA797778 & SRR17635676 & human\\_vagina\\\\\n",
+ "\t UAB082\\_W3D7 & PRJNA797778 & SRR17858154 & PRJNA797778 & SRR17635677 & human\\_vagina\\\\\n",
+ "\t UAB082\\_W2D5 & PRJNA797778 & SRR17858155 & PRJNA797778 & SRR17635678 & human\\_vagina\\\\\n",
+ "\t UAB079\\_W10D1 & PRJNA797778 & SRR17858156 & PRJNA797778 & SRR17635679 & human\\_vagina\\\\\n",
+ "\t UAB079\\_W7D6 & PRJNA797778 & SRR17858157 & PRJNA797778 & SRR17635680 & human\\_vagina\\\\\n",
+ "\t UAB079\\_W6D7 & PRJNA797778 & SRR17858158 & PRJNA797778 & SRR17635681 & human\\_vagina\\\\\n",
+ "\t UAB079\\_W3D7 & PRJNA797778 & SRR17858159 & PRJNA797778 & SRR17635682 & human\\_vagina\\\\\n",
+ "\t UAB079\\_W2D2 & PRJNA797778 & SRR17858160 & PRJNA797778 & SRR17635683 & human\\_vagina\\\\\n",
+ "\t UAB006\\_W2D2 & PRJNA797778 & SRR17858161 & PRJNA797778 & SRR17635801 & human\\_vagina\\\\\n",
+ "\t UAB077\\_W9D7 & PRJNA797778 & SRR17858162 & PRJNA797778 & SRR17635685 & human\\_vagina\\\\\n",
+ "\t UAB077\\_W8D2 & PRJNA797778 & SRR17858163 & PRJNA797778 & SRR17635686 & human\\_vagina\\\\\n",
+ "\t UAB077\\_W5D7 & PRJNA797778 & SRR17858164 & PRJNA797778 & SRR17635687 & human\\_vagina\\\\\n",
+ "\t UAB077\\_W3D1 & PRJNA797778 & SRR17858165 & PRJNA797778 & SRR17635688 & human\\_vagina\\\\\n",
+ "\t UAB077\\_W1D7 & PRJNA797778 & SRR17858166 & PRJNA797778 & SRR17635689 & human\\_vagina\\\\\n",
+ "\t UAB071\\_W10D2 & PRJNA797778 & SRR17858167 & PRJNA797778 & SRR17635690 & human\\_vagina\\\\\n",
+ "\t UAB071\\_W7D7 & PRJNA797778 & SRR17858168 & PRJNA797778 & SRR17635691 & human\\_vagina\\\\\n",
+ "\t UAB071\\_W5D7 & PRJNA797778 & SRR17858169 & PRJNA797778 & SRR17635692 & human\\_vagina\\\\\n",
+ "\t UAB071\\_W3D7 & PRJNA797778 & SRR17858170 & PRJNA797778 & SRR17635693 & human\\_vagina\\\\\n",
+ "\t UAB071\\_W1D7 & PRJNA797778 & SRR17858171 & PRJNA797778 & SRR17635694 & human\\_vagina\\\\\n",
+ "\t UAB002\\_W5D7 & PRJNA797778 & SRR17858172 & PRJNA797778 & SRR17635804 & human\\_vagina\\\\\n",
+ "\t AYAC02\\_W9D3 & PRJNA797778 & SRR17858173 & PRJNA797778 & SRR17635818 & human\\_vagina\\\\\n",
+ "\t UAB060\\_W10D2 & PRJNA797778 & SRR17858174 & PRJNA797778 & SRR17635696 & human\\_vagina\\\\\n",
+ "\t UAB060\\_W7D7 & PRJNA797778 & SRR17858175 & PRJNA797778 & SRR17635697 & human\\_vagina\\\\\n",
+ "\t UAB060\\_W5D7 & PRJNA797778 & SRR17858176 & PRJNA797778 & SRR17635698 & human\\_vagina\\\\\n",
+ "\t UAB060\\_W3D7 & PRJNA797778 & SRR17858177 & PRJNA797778 & SRR17635763 & human\\_vagina\\\\\n",
+ "\t UAB060\\_W1D6 & PRJNA797778 & SRR17858178 & PRJNA797778 & SRR17635764 & human\\_vagina\\\\\n",
+ "\t UAB052\\_W10D2 & PRJNA797778 & SRR17858179 & PRJNA797778 & SRR17635765 & human\\_vagina\\\\\n",
+ "\t UAB052\\_W8D3 & PRJNA797778 & SRR17858180 & PRJNA797778 & SRR17635766 & human\\_vagina\\\\\n",
+ "\t ⋮ & ⋮ & ⋮ & ⋮ & ⋮ & ⋮\\\\\n",
+ "\t UAB116\\_W1D7 & PRJNA797778 & SRR17858301 & PRJNA797778 & SRR17635741 & human\\_vagina\\\\\n",
+ "\t UAB110\\_W10D4 & PRJNA797778 & SRR17858302 & PRJNA797778 & SRR17635742 & human\\_vagina\\\\\n",
+ "\t UAB110\\_W7D7 & PRJNA797778 & SRR17858303 & PRJNA797778 & SRR17635743 & human\\_vagina\\\\\n",
+ "\t UAB110\\_W6D2 & PRJNA797778 & SRR17858304 & PRJNA797778 & SRR17635744 & human\\_vagina\\\\\n",
+ "\t UAB110\\_W3D7 & PRJNA797778 & SRR17858305 & PRJNA797778 & SRR17635745 & human\\_vagina\\\\\n",
+ "\t UAB010\\_W5D6 & PRJNA797778 & SRR17858306 & PRJNA797778 & SRR17635490 & human\\_vagina\\\\\n",
+ "\t UAB110\\_W1D7 & PRJNA797778 & SRR17858307 & PRJNA797778 & SRR17635746 & human\\_vagina\\\\\n",
+ "\t UAB106\\_W9D7 & PRJNA797778 & SRR17858308 & PRJNA797778 & SRR17635748 & human\\_vagina\\\\\n",
+ "\t UAB106\\_W7D7 & PRJNA797778 & SRR17858309 & PRJNA797778 & SRR17635749 & human\\_vagina\\\\\n",
+ "\t UAB106\\_W5D4 & PRJNA797778 & SRR17858310 & PRJNA797778 & SRR17635750 & human\\_vagina\\\\\n",
+ "\t UAB106\\_W3D7 & PRJNA797778 & SRR17858311 & PRJNA797778 & SRR17635751 & human\\_vagina\\\\\n",
+ "\t UAB106\\_W1D7 & PRJNA797778 & SRR17858312 & PRJNA797778 & SRR17635752 & human\\_vagina\\\\\n",
+ "\t UAB096\\_W9D7 & PRJNA797778 & SRR17858313 & PRJNA797778 & SRR17635753 & human\\_vagina\\\\\n",
+ "\t UAB096\\_W7D3 & PRJNA797778 & SRR17858314 & PRJNA797778 & SRR17635754 & human\\_vagina\\\\\n",
+ "\t UAB096\\_W5D3 & PRJNA797778 & SRR17858315 & PRJNA797778 & SRR17635755 & human\\_vagina\\\\\n",
+ "\t UAB096\\_W3D3 & PRJNA797778 & SRR17858316 & PRJNA797778 & SRR17635756 & human\\_vagina\\\\\n",
+ "\t UAB010\\_W3D1 & PRJNA797778 & SRR17858317 & PRJNA797778 & SRR17635491 & human\\_vagina\\\\\n",
+ "\t UAB096\\_W1D5 & PRJNA797778 & SRR17858318 & PRJNA797778 & SRR17635757 & human\\_vagina\\\\\n",
+ "\t UAB093\\_W9D6 & PRJNA797778 & SRR17858319 & PRJNA797778 & SRR17635759 & human\\_vagina\\\\\n",
+ "\t UAB093\\_W7D7 & PRJNA797778 & SRR17858320 & PRJNA797778 & SRR17635760 & human\\_vagina\\\\\n",
+ "\t UAB093\\_W5D7 & PRJNA797778 & SRR17858321 & PRJNA797778 & SRR17635761 & human\\_vagina\\\\\n",
+ "\t UAB093\\_W4D2 & PRJNA797778 & SRR17858322 & PRJNA797778 & SRR17635762 & human\\_vagina\\\\\n",
+ "\t UAB093\\_W1D7 & PRJNA797778 & SRR17858323 & PRJNA797778 & SRR17635667 & human\\_vagina\\\\\n",
+ "\t UAB088\\_W10D1 & PRJNA797778 & SRR17858324 & PRJNA797778 & SRR17635668 & human\\_vagina\\\\\n",
+ "\t UAB088\\_W7D6 & PRJNA797778 & SRR17858325 & PRJNA797778 & SRR17635669 & human\\_vagina\\\\\n",
+ "\t UAB088\\_W6D1 & PRJNA797778 & SRR17858326 & PRJNA797778 & SRR17635670 & human\\_vagina\\\\\n",
+ "\t UAB088\\_W4D1 & PRJNA797778 & SRR17858327 & PRJNA797778 & SRR17635671 & human\\_vagina\\\\\n",
+ "\t UAB007\\_W1D7 & PRJNA797778 & SRR17858328 & PRJNA797778 & SRR17635795 & human\\_vagina\\\\\n",
+ "\t AYAC02\\_W1D7 & PRJNA797778 & SRR17858329 & PRJNA797778 & SRR17635794 & human\\_vagina\\\\\n",
+ "\t EM04\\_W3D7 & PRJNA797778 & SRR17858330 & PRJNA797778 & SRR17635711 & human\\_vagina\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 180 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| UAB088_W2D1 | PRJNA797778 | SRR17858151 | PRJNA797778 | SRR17635672 | human_vagina |\n",
+ "| UAB082_W10D2 | PRJNA797778 | SRR17858152 | PRJNA797778 | SRR17635674 | human_vagina |\n",
+ "| UAB082_W5D7 | PRJNA797778 | SRR17858153 | PRJNA797778 | SRR17635676 | human_vagina |\n",
+ "| UAB082_W3D7 | PRJNA797778 | SRR17858154 | PRJNA797778 | SRR17635677 | human_vagina |\n",
+ "| UAB082_W2D5 | PRJNA797778 | SRR17858155 | PRJNA797778 | SRR17635678 | human_vagina |\n",
+ "| UAB079_W10D1 | PRJNA797778 | SRR17858156 | PRJNA797778 | SRR17635679 | human_vagina |\n",
+ "| UAB079_W7D6 | PRJNA797778 | SRR17858157 | PRJNA797778 | SRR17635680 | human_vagina |\n",
+ "| UAB079_W6D7 | PRJNA797778 | SRR17858158 | PRJNA797778 | SRR17635681 | human_vagina |\n",
+ "| UAB079_W3D7 | PRJNA797778 | SRR17858159 | PRJNA797778 | SRR17635682 | human_vagina |\n",
+ "| UAB079_W2D2 | PRJNA797778 | SRR17858160 | PRJNA797778 | SRR17635683 | human_vagina |\n",
+ "| UAB006_W2D2 | PRJNA797778 | SRR17858161 | PRJNA797778 | SRR17635801 | human_vagina |\n",
+ "| UAB077_W9D7 | PRJNA797778 | SRR17858162 | PRJNA797778 | SRR17635685 | human_vagina |\n",
+ "| UAB077_W8D2 | PRJNA797778 | SRR17858163 | PRJNA797778 | SRR17635686 | human_vagina |\n",
+ "| UAB077_W5D7 | PRJNA797778 | SRR17858164 | PRJNA797778 | SRR17635687 | human_vagina |\n",
+ "| UAB077_W3D1 | PRJNA797778 | SRR17858165 | PRJNA797778 | SRR17635688 | human_vagina |\n",
+ "| UAB077_W1D7 | PRJNA797778 | SRR17858166 | PRJNA797778 | SRR17635689 | human_vagina |\n",
+ "| UAB071_W10D2 | PRJNA797778 | SRR17858167 | PRJNA797778 | SRR17635690 | human_vagina |\n",
+ "| UAB071_W7D7 | PRJNA797778 | SRR17858168 | PRJNA797778 | SRR17635691 | human_vagina |\n",
+ "| UAB071_W5D7 | PRJNA797778 | SRR17858169 | PRJNA797778 | SRR17635692 | human_vagina |\n",
+ "| UAB071_W3D7 | PRJNA797778 | SRR17858170 | PRJNA797778 | SRR17635693 | human_vagina |\n",
+ "| UAB071_W1D7 | PRJNA797778 | SRR17858171 | PRJNA797778 | SRR17635694 | human_vagina |\n",
+ "| UAB002_W5D7 | PRJNA797778 | SRR17858172 | PRJNA797778 | SRR17635804 | human_vagina |\n",
+ "| AYAC02_W9D3 | PRJNA797778 | SRR17858173 | PRJNA797778 | SRR17635818 | human_vagina |\n",
+ "| UAB060_W10D2 | PRJNA797778 | SRR17858174 | PRJNA797778 | SRR17635696 | human_vagina |\n",
+ "| UAB060_W7D7 | PRJNA797778 | SRR17858175 | PRJNA797778 | SRR17635697 | human_vagina |\n",
+ "| UAB060_W5D7 | PRJNA797778 | SRR17858176 | PRJNA797778 | SRR17635698 | human_vagina |\n",
+ "| UAB060_W3D7 | PRJNA797778 | SRR17858177 | PRJNA797778 | SRR17635763 | human_vagina |\n",
+ "| UAB060_W1D6 | PRJNA797778 | SRR17858178 | PRJNA797778 | SRR17635764 | human_vagina |\n",
+ "| UAB052_W10D2 | PRJNA797778 | SRR17858179 | PRJNA797778 | SRR17635765 | human_vagina |\n",
+ "| UAB052_W8D3 | PRJNA797778 | SRR17858180 | PRJNA797778 | SRR17635766 | human_vagina |\n",
+ "| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |\n",
+ "| UAB116_W1D7 | PRJNA797778 | SRR17858301 | PRJNA797778 | SRR17635741 | human_vagina |\n",
+ "| UAB110_W10D4 | PRJNA797778 | SRR17858302 | PRJNA797778 | SRR17635742 | human_vagina |\n",
+ "| UAB110_W7D7 | PRJNA797778 | SRR17858303 | PRJNA797778 | SRR17635743 | human_vagina |\n",
+ "| UAB110_W6D2 | PRJNA797778 | SRR17858304 | PRJNA797778 | SRR17635744 | human_vagina |\n",
+ "| UAB110_W3D7 | PRJNA797778 | SRR17858305 | PRJNA797778 | SRR17635745 | human_vagina |\n",
+ "| UAB010_W5D6 | PRJNA797778 | SRR17858306 | PRJNA797778 | SRR17635490 | human_vagina |\n",
+ "| UAB110_W1D7 | PRJNA797778 | SRR17858307 | PRJNA797778 | SRR17635746 | human_vagina |\n",
+ "| UAB106_W9D7 | PRJNA797778 | SRR17858308 | PRJNA797778 | SRR17635748 | human_vagina |\n",
+ "| UAB106_W7D7 | PRJNA797778 | SRR17858309 | PRJNA797778 | SRR17635749 | human_vagina |\n",
+ "| UAB106_W5D4 | PRJNA797778 | SRR17858310 | PRJNA797778 | SRR17635750 | human_vagina |\n",
+ "| UAB106_W3D7 | PRJNA797778 | SRR17858311 | PRJNA797778 | SRR17635751 | human_vagina |\n",
+ "| UAB106_W1D7 | PRJNA797778 | SRR17858312 | PRJNA797778 | SRR17635752 | human_vagina |\n",
+ "| UAB096_W9D7 | PRJNA797778 | SRR17858313 | PRJNA797778 | SRR17635753 | human_vagina |\n",
+ "| UAB096_W7D3 | PRJNA797778 | SRR17858314 | PRJNA797778 | SRR17635754 | human_vagina |\n",
+ "| UAB096_W5D3 | PRJNA797778 | SRR17858315 | PRJNA797778 | SRR17635755 | human_vagina |\n",
+ "| UAB096_W3D3 | PRJNA797778 | SRR17858316 | PRJNA797778 | SRR17635756 | human_vagina |\n",
+ "| UAB010_W3D1 | PRJNA797778 | SRR17858317 | PRJNA797778 | SRR17635491 | human_vagina |\n",
+ "| UAB096_W1D5 | PRJNA797778 | SRR17858318 | PRJNA797778 | SRR17635757 | human_vagina |\n",
+ "| UAB093_W9D6 | PRJNA797778 | SRR17858319 | PRJNA797778 | SRR17635759 | human_vagina |\n",
+ "| UAB093_W7D7 | PRJNA797778 | SRR17858320 | PRJNA797778 | SRR17635760 | human_vagina |\n",
+ "| UAB093_W5D7 | PRJNA797778 | SRR17858321 | PRJNA797778 | SRR17635761 | human_vagina |\n",
+ "| UAB093_W4D2 | PRJNA797778 | SRR17858322 | PRJNA797778 | SRR17635762 | human_vagina |\n",
+ "| UAB093_W1D7 | PRJNA797778 | SRR17858323 | PRJNA797778 | SRR17635667 | human_vagina |\n",
+ "| UAB088_W10D1 | PRJNA797778 | SRR17858324 | PRJNA797778 | SRR17635668 | human_vagina |\n",
+ "| UAB088_W7D6 | PRJNA797778 | SRR17858325 | PRJNA797778 | SRR17635669 | human_vagina |\n",
+ "| UAB088_W6D1 | PRJNA797778 | SRR17858326 | PRJNA797778 | SRR17635670 | human_vagina |\n",
+ "| UAB088_W4D1 | PRJNA797778 | SRR17858327 | PRJNA797778 | SRR17635671 | human_vagina |\n",
+ "| UAB007_W1D7 | PRJNA797778 | SRR17858328 | PRJNA797778 | SRR17635795 | human_vagina |\n",
+ "| AYAC02_W1D7 | PRJNA797778 | SRR17858329 | PRJNA797778 | SRR17635794 | human_vagina |\n",
+ "| EM04_W3D7 | PRJNA797778 | SRR17858330 | PRJNA797778 | SRR17635711 | human_vagina |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 UAB088_W2D1 PRJNA797778 SRR17858151 PRJNA797778 \n",
+ "2 UAB082_W10D2 PRJNA797778 SRR17858152 PRJNA797778 \n",
+ "3 UAB082_W5D7 PRJNA797778 SRR17858153 PRJNA797778 \n",
+ "4 UAB082_W3D7 PRJNA797778 SRR17858154 PRJNA797778 \n",
+ "5 UAB082_W2D5 PRJNA797778 SRR17858155 PRJNA797778 \n",
+ "6 UAB079_W10D1 PRJNA797778 SRR17858156 PRJNA797778 \n",
+ "7 UAB079_W7D6 PRJNA797778 SRR17858157 PRJNA797778 \n",
+ "8 UAB079_W6D7 PRJNA797778 SRR17858158 PRJNA797778 \n",
+ "9 UAB079_W3D7 PRJNA797778 SRR17858159 PRJNA797778 \n",
+ "10 UAB079_W2D2 PRJNA797778 SRR17858160 PRJNA797778 \n",
+ "11 UAB006_W2D2 PRJNA797778 SRR17858161 PRJNA797778 \n",
+ "12 UAB077_W9D7 PRJNA797778 SRR17858162 PRJNA797778 \n",
+ "13 UAB077_W8D2 PRJNA797778 SRR17858163 PRJNA797778 \n",
+ "14 UAB077_W5D7 PRJNA797778 SRR17858164 PRJNA797778 \n",
+ "15 UAB077_W3D1 PRJNA797778 SRR17858165 PRJNA797778 \n",
+ "16 UAB077_W1D7 PRJNA797778 SRR17858166 PRJNA797778 \n",
+ "17 UAB071_W10D2 PRJNA797778 SRR17858167 PRJNA797778 \n",
+ "18 UAB071_W7D7 PRJNA797778 SRR17858168 PRJNA797778 \n",
+ "19 UAB071_W5D7 PRJNA797778 SRR17858169 PRJNA797778 \n",
+ "20 UAB071_W3D7 PRJNA797778 SRR17858170 PRJNA797778 \n",
+ "21 UAB071_W1D7 PRJNA797778 SRR17858171 PRJNA797778 \n",
+ "22 UAB002_W5D7 PRJNA797778 SRR17858172 PRJNA797778 \n",
+ "23 AYAC02_W9D3 PRJNA797778 SRR17858173 PRJNA797778 \n",
+ "24 UAB060_W10D2 PRJNA797778 SRR17858174 PRJNA797778 \n",
+ "25 UAB060_W7D7 PRJNA797778 SRR17858175 PRJNA797778 \n",
+ "26 UAB060_W5D7 PRJNA797778 SRR17858176 PRJNA797778 \n",
+ "27 UAB060_W3D7 PRJNA797778 SRR17858177 PRJNA797778 \n",
+ "28 UAB060_W1D6 PRJNA797778 SRR17858178 PRJNA797778 \n",
+ "29 UAB052_W10D2 PRJNA797778 SRR17858179 PRJNA797778 \n",
+ "30 UAB052_W8D3 PRJNA797778 SRR17858180 PRJNA797778 \n",
+ "⋮ ⋮ ⋮ ⋮ ⋮ \n",
+ "151 UAB116_W1D7 PRJNA797778 SRR17858301 PRJNA797778 \n",
+ "152 UAB110_W10D4 PRJNA797778 SRR17858302 PRJNA797778 \n",
+ "153 UAB110_W7D7 PRJNA797778 SRR17858303 PRJNA797778 \n",
+ "154 UAB110_W6D2 PRJNA797778 SRR17858304 PRJNA797778 \n",
+ "155 UAB110_W3D7 PRJNA797778 SRR17858305 PRJNA797778 \n",
+ "156 UAB010_W5D6 PRJNA797778 SRR17858306 PRJNA797778 \n",
+ "157 UAB110_W1D7 PRJNA797778 SRR17858307 PRJNA797778 \n",
+ "158 UAB106_W9D7 PRJNA797778 SRR17858308 PRJNA797778 \n",
+ "159 UAB106_W7D7 PRJNA797778 SRR17858309 PRJNA797778 \n",
+ "160 UAB106_W5D4 PRJNA797778 SRR17858310 PRJNA797778 \n",
+ "161 UAB106_W3D7 PRJNA797778 SRR17858311 PRJNA797778 \n",
+ "162 UAB106_W1D7 PRJNA797778 SRR17858312 PRJNA797778 \n",
+ "163 UAB096_W9D7 PRJNA797778 SRR17858313 PRJNA797778 \n",
+ "164 UAB096_W7D3 PRJNA797778 SRR17858314 PRJNA797778 \n",
+ "165 UAB096_W5D3 PRJNA797778 SRR17858315 PRJNA797778 \n",
+ "166 UAB096_W3D3 PRJNA797778 SRR17858316 PRJNA797778 \n",
+ "167 UAB010_W3D1 PRJNA797778 SRR17858317 PRJNA797778 \n",
+ "168 UAB096_W1D5 PRJNA797778 SRR17858318 PRJNA797778 \n",
+ "169 UAB093_W9D6 PRJNA797778 SRR17858319 PRJNA797778 \n",
+ "170 UAB093_W7D7 PRJNA797778 SRR17858320 PRJNA797778 \n",
+ "171 UAB093_W5D7 PRJNA797778 SRR17858321 PRJNA797778 \n",
+ "172 UAB093_W4D2 PRJNA797778 SRR17858322 PRJNA797778 \n",
+ "173 UAB093_W1D7 PRJNA797778 SRR17858323 PRJNA797778 \n",
+ "174 UAB088_W10D1 PRJNA797778 SRR17858324 PRJNA797778 \n",
+ "175 UAB088_W7D6 PRJNA797778 SRR17858325 PRJNA797778 \n",
+ "176 UAB088_W6D1 PRJNA797778 SRR17858326 PRJNA797778 \n",
+ "177 UAB088_W4D1 PRJNA797778 SRR17858327 PRJNA797778 \n",
+ "178 UAB007_W1D7 PRJNA797778 SRR17858328 PRJNA797778 \n",
+ "179 AYAC02_W1D7 PRJNA797778 SRR17858329 PRJNA797778 \n",
+ "180 EM04_W3D7 PRJNA797778 SRR17858330 PRJNA797778 \n",
+ " mgx_run_accession sample_type \n",
+ "1 SRR17635672 human_vagina\n",
+ "2 SRR17635674 human_vagina\n",
+ "3 SRR17635676 human_vagina\n",
+ "4 SRR17635677 human_vagina\n",
+ "5 SRR17635678 human_vagina\n",
+ "6 SRR17635679 human_vagina\n",
+ "7 SRR17635680 human_vagina\n",
+ "8 SRR17635681 human_vagina\n",
+ "9 SRR17635682 human_vagina\n",
+ "10 SRR17635683 human_vagina\n",
+ "11 SRR17635801 human_vagina\n",
+ "12 SRR17635685 human_vagina\n",
+ "13 SRR17635686 human_vagina\n",
+ "14 SRR17635687 human_vagina\n",
+ "15 SRR17635688 human_vagina\n",
+ "16 SRR17635689 human_vagina\n",
+ "17 SRR17635690 human_vagina\n",
+ "18 SRR17635691 human_vagina\n",
+ "19 SRR17635692 human_vagina\n",
+ "20 SRR17635693 human_vagina\n",
+ "21 SRR17635694 human_vagina\n",
+ "22 SRR17635804 human_vagina\n",
+ "23 SRR17635818 human_vagina\n",
+ "24 SRR17635696 human_vagina\n",
+ "25 SRR17635697 human_vagina\n",
+ "26 SRR17635698 human_vagina\n",
+ "27 SRR17635763 human_vagina\n",
+ "28 SRR17635764 human_vagina\n",
+ "29 SRR17635765 human_vagina\n",
+ "30 SRR17635766 human_vagina\n",
+ "⋮ ⋮ ⋮ \n",
+ "151 SRR17635741 human_vagina\n",
+ "152 SRR17635742 human_vagina\n",
+ "153 SRR17635743 human_vagina\n",
+ "154 SRR17635744 human_vagina\n",
+ "155 SRR17635745 human_vagina\n",
+ "156 SRR17635490 human_vagina\n",
+ "157 SRR17635746 human_vagina\n",
+ "158 SRR17635748 human_vagina\n",
+ "159 SRR17635749 human_vagina\n",
+ "160 SRR17635750 human_vagina\n",
+ "161 SRR17635751 human_vagina\n",
+ "162 SRR17635752 human_vagina\n",
+ "163 SRR17635753 human_vagina\n",
+ "164 SRR17635754 human_vagina\n",
+ "165 SRR17635755 human_vagina\n",
+ "166 SRR17635756 human_vagina\n",
+ "167 SRR17635491 human_vagina\n",
+ "168 SRR17635757 human_vagina\n",
+ "169 SRR17635759 human_vagina\n",
+ "170 SRR17635760 human_vagina\n",
+ "171 SRR17635761 human_vagina\n",
+ "172 SRR17635762 human_vagina\n",
+ "173 SRR17635667 human_vagina\n",
+ "174 SRR17635668 human_vagina\n",
+ "175 SRR17635669 human_vagina\n",
+ "176 SRR17635670 human_vagina\n",
+ "177 SRR17635671 human_vagina\n",
+ "178 SRR17635795 human_vagina\n",
+ "179 SRR17635794 human_vagina\n",
+ "180 SRR17635711 human_vagina"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna797778_mtx <- prjna797778 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna797778_mgx <- prjna797778 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna797778_mtx_vs_mgx <- left_join(prjna797778_mtx, prjna797778_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"human_vagina\")\n",
+ "\n",
+ "prjna797778_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e19c96ba",
+ "metadata": {},
+ "source": [
+ "## PRJNA339914"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "bb6f3136",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna339914 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA339914&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "3c6e9e34",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tMV_FEI4_t2Q15 | PRJNA339914 | SRR4100706 | PRJNA339914 | SRR4052039 | human_gut |
\n",
+ "\tMV_FEI5_t3Q15 | PRJNA339914 | SRR4100707 | PRJNA339914 | SRR4052042 | human_gut |
\n",
+ "\tMV_FEM4_t2Q15 | PRJNA339914 | SRR4100708 | PRJNA339914 | SRR4052025 | human_gut |
\n",
+ "\tMV_FEM5_t3Q15 | PRJNA339914 | SRR4100709 | PRJNA339914 | SRR4052028 | human_gut |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 4 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t MV\\_FEI4\\_t2Q15 & PRJNA339914 & SRR4100706 & PRJNA339914 & SRR4052039 & human\\_gut\\\\\n",
+ "\t MV\\_FEI5\\_t3Q15 & PRJNA339914 & SRR4100707 & PRJNA339914 & SRR4052042 & human\\_gut\\\\\n",
+ "\t MV\\_FEM4\\_t2Q15 & PRJNA339914 & SRR4100708 & PRJNA339914 & SRR4052025 & human\\_gut\\\\\n",
+ "\t MV\\_FEM5\\_t3Q15 & PRJNA339914 & SRR4100709 & PRJNA339914 & SRR4052028 & human\\_gut\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| MV_FEI4_t2Q15 | PRJNA339914 | SRR4100706 | PRJNA339914 | SRR4052039 | human_gut |\n",
+ "| MV_FEI5_t3Q15 | PRJNA339914 | SRR4100707 | PRJNA339914 | SRR4052042 | human_gut |\n",
+ "| MV_FEM4_t2Q15 | PRJNA339914 | SRR4100708 | PRJNA339914 | SRR4052025 | human_gut |\n",
+ "| MV_FEM5_t3Q15 | PRJNA339914 | SRR4100709 | PRJNA339914 | SRR4052028 | human_gut |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 MV_FEI4_t2Q15 PRJNA339914 SRR4100706 PRJNA339914 \n",
+ "2 MV_FEI5_t3Q15 PRJNA339914 SRR4100707 PRJNA339914 \n",
+ "3 MV_FEM4_t2Q15 PRJNA339914 SRR4100708 PRJNA339914 \n",
+ "4 MV_FEM5_t3Q15 PRJNA339914 SRR4100709 PRJNA339914 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR4052039 human_gut \n",
+ "2 SRR4052042 human_gut \n",
+ "3 SRR4052025 human_gut \n",
+ "4 SRR4052028 human_gut "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna339914_mtx <- prjna339914 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"RNA\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna339914_mgx <- prjna339914 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna339914_mtx_vs_mgx <- left_join(prjna339914_mtx, prjna339914_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"human_gut\")\n",
+ "\n",
+ "prjna339914_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a6999921",
+ "metadata": {},
+ "source": [
+ "## PRJEB33889"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "8d446ea8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjeb33889 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB33889&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "a4d19362",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 8 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tM1 | PRJEB33889 | ERR3473664 | PRJEB33889 | ERR3473656 | mouse_cecum |
\n",
+ "\tM2 | PRJEB33889 | ERR3473665 | PRJEB33889 | ERR3473657 | mouse_cecum |
\n",
+ "\tM3 | PRJEB33889 | ERR3473666 | PRJEB33889 | ERR3473658 | mouse_cecum |
\n",
+ "\tM4 | PRJEB33889 | ERR3473667 | PRJEB33889 | ERR3473659 | mouse_cecum |
\n",
+ "\tM5 | PRJEB33889 | ERR3473668 | PRJEB33889 | ERR3473660 | mouse_cecum |
\n",
+ "\tM6 | PRJEB33889 | ERR3473669 | PRJEB33889 | ERR3473661 | mouse_cecum |
\n",
+ "\tM7 | PRJEB33889 | ERR3473670 | PRJEB33889 | ERR3473662 | mouse_cecum |
\n",
+ "\tM8 | PRJEB33889 | ERR3473671 | PRJEB33889 | ERR3473663 | mouse_cecum |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 8 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t M1 & PRJEB33889 & ERR3473664 & PRJEB33889 & ERR3473656 & mouse\\_cecum\\\\\n",
+ "\t M2 & PRJEB33889 & ERR3473665 & PRJEB33889 & ERR3473657 & mouse\\_cecum\\\\\n",
+ "\t M3 & PRJEB33889 & ERR3473666 & PRJEB33889 & ERR3473658 & mouse\\_cecum\\\\\n",
+ "\t M4 & PRJEB33889 & ERR3473667 & PRJEB33889 & ERR3473659 & mouse\\_cecum\\\\\n",
+ "\t M5 & PRJEB33889 & ERR3473668 & PRJEB33889 & ERR3473660 & mouse\\_cecum\\\\\n",
+ "\t M6 & PRJEB33889 & ERR3473669 & PRJEB33889 & ERR3473661 & mouse\\_cecum\\\\\n",
+ "\t M7 & PRJEB33889 & ERR3473670 & PRJEB33889 & ERR3473662 & mouse\\_cecum\\\\\n",
+ "\t M8 & PRJEB33889 & ERR3473671 & PRJEB33889 & ERR3473663 & mouse\\_cecum\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 8 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| M1 | PRJEB33889 | ERR3473664 | PRJEB33889 | ERR3473656 | mouse_cecum |\n",
+ "| M2 | PRJEB33889 | ERR3473665 | PRJEB33889 | ERR3473657 | mouse_cecum |\n",
+ "| M3 | PRJEB33889 | ERR3473666 | PRJEB33889 | ERR3473658 | mouse_cecum |\n",
+ "| M4 | PRJEB33889 | ERR3473667 | PRJEB33889 | ERR3473659 | mouse_cecum |\n",
+ "| M5 | PRJEB33889 | ERR3473668 | PRJEB33889 | ERR3473660 | mouse_cecum |\n",
+ "| M6 | PRJEB33889 | ERR3473669 | PRJEB33889 | ERR3473661 | mouse_cecum |\n",
+ "| M7 | PRJEB33889 | ERR3473670 | PRJEB33889 | ERR3473662 | mouse_cecum |\n",
+ "| M8 | PRJEB33889 | ERR3473671 | PRJEB33889 | ERR3473663 | mouse_cecum |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 M1 PRJEB33889 ERR3473664 PRJEB33889 \n",
+ "2 M2 PRJEB33889 ERR3473665 PRJEB33889 \n",
+ "3 M3 PRJEB33889 ERR3473666 PRJEB33889 \n",
+ "4 M4 PRJEB33889 ERR3473667 PRJEB33889 \n",
+ "5 M5 PRJEB33889 ERR3473668 PRJEB33889 \n",
+ "6 M6 PRJEB33889 ERR3473669 PRJEB33889 \n",
+ "7 M7 PRJEB33889 ERR3473670 PRJEB33889 \n",
+ "8 M8 PRJEB33889 ERR3473671 PRJEB33889 \n",
+ " mgx_run_accession sample_type\n",
+ "1 ERR3473656 mouse_cecum\n",
+ "2 ERR3473657 mouse_cecum\n",
+ "3 ERR3473658 mouse_cecum\n",
+ "4 ERR3473659 mouse_cecum\n",
+ "5 ERR3473660 mouse_cecum\n",
+ "6 ERR3473661 mouse_cecum\n",
+ "7 ERR3473662 mouse_cecum\n",
+ "8 ERR3473663 mouse_cecum"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjeb33889_mtx <- prjeb33889 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"T\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb33889_mgx <- prjeb33889 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " filter(!grepl(\"16S\", sample_alias)) %>%\n",
+ " mutate(sample_name = gsub(\"G\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb33889_mtx_vs_mgx <- left_join(prjeb33889_mtx, prjeb33889_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"mouse_cecum\")\n",
+ "\n",
+ "prjeb33889_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f04c78a3",
+ "metadata": {},
+ "source": [
+ "## PRJNA698464"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "7fcee7aa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna698464 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA698464&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "ccada9e6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tad_supplemented_with_nitrogen_data_2 | PRJNA698464 | SRR13618127 | PRJNA698464 | SRR13618123 | bioreactor |
\n",
+ "\tad_supplemented_with_nitrogen_data_1 | PRJNA698464 | SRR13618128 | PRJNA698464 | SRR13618124 | bioreactor |
\n",
+ "\tad_supplemented_with_hydrogen_data_2 | PRJNA698464 | SRR13618129 | PRJNA698464 | SRR13618125 | bioreactor |
\n",
+ "\tad_supplemented_with_hydrogen_data_1 | PRJNA698464 | SRR13618130 | PRJNA698464 | SRR13618126 | bioreactor |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 4 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t ad\\_supplemented\\_with\\_nitrogen\\_data\\_2 & PRJNA698464 & SRR13618127 & PRJNA698464 & SRR13618123 & bioreactor\\\\\n",
+ "\t ad\\_supplemented\\_with\\_nitrogen\\_data\\_1 & PRJNA698464 & SRR13618128 & PRJNA698464 & SRR13618124 & bioreactor\\\\\n",
+ "\t ad\\_supplemented\\_with\\_hydrogen\\_data\\_2 & PRJNA698464 & SRR13618129 & PRJNA698464 & SRR13618125 & bioreactor\\\\\n",
+ "\t ad\\_supplemented\\_with\\_hydrogen\\_data\\_1 & PRJNA698464 & SRR13618130 & PRJNA698464 & SRR13618126 & bioreactor\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| ad_supplemented_with_nitrogen_data_2 | PRJNA698464 | SRR13618127 | PRJNA698464 | SRR13618123 | bioreactor |\n",
+ "| ad_supplemented_with_nitrogen_data_1 | PRJNA698464 | SRR13618128 | PRJNA698464 | SRR13618124 | bioreactor |\n",
+ "| ad_supplemented_with_hydrogen_data_2 | PRJNA698464 | SRR13618129 | PRJNA698464 | SRR13618125 | bioreactor |\n",
+ "| ad_supplemented_with_hydrogen_data_1 | PRJNA698464 | SRR13618130 | PRJNA698464 | SRR13618126 | bioreactor |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession\n",
+ "1 ad_supplemented_with_nitrogen_data_2 PRJNA698464 SRR13618127 \n",
+ "2 ad_supplemented_with_nitrogen_data_1 PRJNA698464 SRR13618128 \n",
+ "3 ad_supplemented_with_hydrogen_data_2 PRJNA698464 SRR13618129 \n",
+ "4 ad_supplemented_with_hydrogen_data_1 PRJNA698464 SRR13618130 \n",
+ " mgx_study_accession mgx_run_accession sample_type\n",
+ "1 PRJNA698464 SRR13618123 bioreactor \n",
+ "2 PRJNA698464 SRR13618124 bioreactor \n",
+ "3 PRJNA698464 SRR13618125 bioreactor \n",
+ "4 PRJNA698464 SRR13618126 bioreactor "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna698464_mtx <- prjna698464 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"Illumina MiSeq sequencing; \", \"\", experiment_title),\n",
+ " sample_name = gsub(\"RNA\", \"\", sample_name),\n",
+ " sample_name = make_clean_names(sample_name)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna698464_mgx <- prjna698464 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"Illumina MiSeq sequencing; \", \"\", experiment_title),\n",
+ " sample_name = gsub(\"DNA\", \"\", sample_name),\n",
+ " sample_name = make_clean_names(sample_name)) %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna698464_mtx_vs_mgx <- left_join(prjna698464_mtx, prjna698464_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"bioreactor\")\n",
+ "\n",
+ "prjna698464_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e51f3b6d",
+ "metadata": {},
+ "source": [
+ "## PRJNA396840"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "d841dd17",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna396840 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA396840&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "a6864499",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 30 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tX4074 | PRJNA396840 | SRR5892181 | PRJNA396840 | SRR5892217 | human_oral |
\n",
+ "\tX4080 | PRJNA396840 | SRR5892182 | PRJNA396840 | SRR5892216 | human_oral |
\n",
+ "\tX4068 | PRJNA396840 | SRR5892183 | PRJNA396840 | SRR5892215 | human_oral |
\n",
+ "\tX4072 | PRJNA396840 | SRR5892184 | PRJNA396840 | SRR5892214 | human_oral |
\n",
+ "\tX4060 | PRJNA396840 | SRR5892185 | PRJNA396840 | SRR5892213 | human_oral |
\n",
+ "\tX4064 | PRJNA396840 | SRR5892186 | PRJNA396840 | SRR5892212 | human_oral |
\n",
+ "\tX4050 | PRJNA396840 | SRR5892187 | PRJNA396840 | SRR5892211 | human_oral |
\n",
+ "\tX4056 | PRJNA396840 | SRR5892188 | PRJNA396840 | SRR5892210 | human_oral |
\n",
+ "\tX4108 | PRJNA396840 | SRR5892189 | PRJNA396840 | SRR5892233 | human_oral |
\n",
+ "\tX4082 | PRJNA396840 | SRR5892190 | PRJNA396840 | SRR5892209 | human_oral |
\n",
+ "\tX4088 | PRJNA396840 | SRR5892191 | PRJNA396840 | SRR5892208 | human_oral |
\n",
+ "\tX4124 | PRJNA396840 | SRR5892192 | PRJNA396840 | SRR5892236 | human_oral |
\n",
+ "\tX4120 | PRJNA396840 | SRR5892195 | PRJNA396840 | SRR5892235 | human_oral |
\n",
+ "\tX4096 | PRJNA396840 | SRR5892200 | PRJNA396840 | SRR5892238 | human_oral |
\n",
+ "\tX4092 | PRJNA396840 | SRR5892204 | PRJNA396840 | SRR5892237 | human_oral |
\n",
+ "\tX4104 | PRJNA396840 | SRR5892205 | PRJNA396840 | SRR5892240 | human_oral |
\n",
+ "\tX4100 | PRJNA396840 | SRR5892207 | PRJNA396840 | SRR5892239 | human_oral |
\n",
+ "\tX4024 | PRJNA396840 | SRR5892218 | PRJNA396840 | SRR5892196 | human_oral |
\n",
+ "\tX4020 | PRJNA396840 | SRR5892219 | PRJNA396840 | SRR5892197 | human_oral |
\n",
+ "\tX4014 | PRJNA396840 | SRR5892220 | PRJNA396840 | SRR5892198 | human_oral |
\n",
+ "\tX4012 | PRJNA396840 | SRR5892221 | PRJNA396840 | SRR5892199 | human_oral |
\n",
+ "\tX4040 | PRJNA396840 | SRR5892222 | PRJNA396840 | SRR5892206 | human_oral |
\n",
+ "\tX4036 | PRJNA396840 | SRR5892223 | PRJNA396840 | SRR5892201 | human_oral |
\n",
+ "\tX4032 | PRJNA396840 | SRR5892224 | PRJNA396840 | SRR5892202 | human_oral |
\n",
+ "\tX4028 | PRJNA396840 | SRR5892225 | PRJNA396840 | SRR5892203 | human_oral |
\n",
+ "\tX4048 | PRJNA396840 | SRR5892226 | PRJNA396840 | SRR5892193 | human_oral |
\n",
+ "\tX4044 | PRJNA396840 | SRR5892227 | PRJNA396840 | SRR5892194 | human_oral |
\n",
+ "\tX4132 | PRJNA396840 | SRR5892228 | PRJNA396840 | SRR5892232 | human_oral |
\n",
+ "\tX4128 | PRJNA396840 | SRR5892229 | PRJNA396840 | SRR5892231 | human_oral |
\n",
+ "\tX4116 | PRJNA396840 | SRR5892230 | PRJNA396840 | SRR5892234 | human_oral |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 30 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t X4074 & PRJNA396840 & SRR5892181 & PRJNA396840 & SRR5892217 & human\\_oral\\\\\n",
+ "\t X4080 & PRJNA396840 & SRR5892182 & PRJNA396840 & SRR5892216 & human\\_oral\\\\\n",
+ "\t X4068 & PRJNA396840 & SRR5892183 & PRJNA396840 & SRR5892215 & human\\_oral\\\\\n",
+ "\t X4072 & PRJNA396840 & SRR5892184 & PRJNA396840 & SRR5892214 & human\\_oral\\\\\n",
+ "\t X4060 & PRJNA396840 & SRR5892185 & PRJNA396840 & SRR5892213 & human\\_oral\\\\\n",
+ "\t X4064 & PRJNA396840 & SRR5892186 & PRJNA396840 & SRR5892212 & human\\_oral\\\\\n",
+ "\t X4050 & PRJNA396840 & SRR5892187 & PRJNA396840 & SRR5892211 & human\\_oral\\\\\n",
+ "\t X4056 & PRJNA396840 & SRR5892188 & PRJNA396840 & SRR5892210 & human\\_oral\\\\\n",
+ "\t X4108 & PRJNA396840 & SRR5892189 & PRJNA396840 & SRR5892233 & human\\_oral\\\\\n",
+ "\t X4082 & PRJNA396840 & SRR5892190 & PRJNA396840 & SRR5892209 & human\\_oral\\\\\n",
+ "\t X4088 & PRJNA396840 & SRR5892191 & PRJNA396840 & SRR5892208 & human\\_oral\\\\\n",
+ "\t X4124 & PRJNA396840 & SRR5892192 & PRJNA396840 & SRR5892236 & human\\_oral\\\\\n",
+ "\t X4120 & PRJNA396840 & SRR5892195 & PRJNA396840 & SRR5892235 & human\\_oral\\\\\n",
+ "\t X4096 & PRJNA396840 & SRR5892200 & PRJNA396840 & SRR5892238 & human\\_oral\\\\\n",
+ "\t X4092 & PRJNA396840 & SRR5892204 & PRJNA396840 & SRR5892237 & human\\_oral\\\\\n",
+ "\t X4104 & PRJNA396840 & SRR5892205 & PRJNA396840 & SRR5892240 & human\\_oral\\\\\n",
+ "\t X4100 & PRJNA396840 & SRR5892207 & PRJNA396840 & SRR5892239 & human\\_oral\\\\\n",
+ "\t X4024 & PRJNA396840 & SRR5892218 & PRJNA396840 & SRR5892196 & human\\_oral\\\\\n",
+ "\t X4020 & PRJNA396840 & SRR5892219 & PRJNA396840 & SRR5892197 & human\\_oral\\\\\n",
+ "\t X4014 & PRJNA396840 & SRR5892220 & PRJNA396840 & SRR5892198 & human\\_oral\\\\\n",
+ "\t X4012 & PRJNA396840 & SRR5892221 & PRJNA396840 & SRR5892199 & human\\_oral\\\\\n",
+ "\t X4040 & PRJNA396840 & SRR5892222 & PRJNA396840 & SRR5892206 & human\\_oral\\\\\n",
+ "\t X4036 & PRJNA396840 & SRR5892223 & PRJNA396840 & SRR5892201 & human\\_oral\\\\\n",
+ "\t X4032 & PRJNA396840 & SRR5892224 & PRJNA396840 & SRR5892202 & human\\_oral\\\\\n",
+ "\t X4028 & PRJNA396840 & SRR5892225 & PRJNA396840 & SRR5892203 & human\\_oral\\\\\n",
+ "\t X4048 & PRJNA396840 & SRR5892226 & PRJNA396840 & SRR5892193 & human\\_oral\\\\\n",
+ "\t X4044 & PRJNA396840 & SRR5892227 & PRJNA396840 & SRR5892194 & human\\_oral\\\\\n",
+ "\t X4132 & PRJNA396840 & SRR5892228 & PRJNA396840 & SRR5892232 & human\\_oral\\\\\n",
+ "\t X4128 & PRJNA396840 & SRR5892229 & PRJNA396840 & SRR5892231 & human\\_oral\\\\\n",
+ "\t X4116 & PRJNA396840 & SRR5892230 & PRJNA396840 & SRR5892234 & human\\_oral\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 30 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| X4074 | PRJNA396840 | SRR5892181 | PRJNA396840 | SRR5892217 | human_oral |\n",
+ "| X4080 | PRJNA396840 | SRR5892182 | PRJNA396840 | SRR5892216 | human_oral |\n",
+ "| X4068 | PRJNA396840 | SRR5892183 | PRJNA396840 | SRR5892215 | human_oral |\n",
+ "| X4072 | PRJNA396840 | SRR5892184 | PRJNA396840 | SRR5892214 | human_oral |\n",
+ "| X4060 | PRJNA396840 | SRR5892185 | PRJNA396840 | SRR5892213 | human_oral |\n",
+ "| X4064 | PRJNA396840 | SRR5892186 | PRJNA396840 | SRR5892212 | human_oral |\n",
+ "| X4050 | PRJNA396840 | SRR5892187 | PRJNA396840 | SRR5892211 | human_oral |\n",
+ "| X4056 | PRJNA396840 | SRR5892188 | PRJNA396840 | SRR5892210 | human_oral |\n",
+ "| X4108 | PRJNA396840 | SRR5892189 | PRJNA396840 | SRR5892233 | human_oral |\n",
+ "| X4082 | PRJNA396840 | SRR5892190 | PRJNA396840 | SRR5892209 | human_oral |\n",
+ "| X4088 | PRJNA396840 | SRR5892191 | PRJNA396840 | SRR5892208 | human_oral |\n",
+ "| X4124 | PRJNA396840 | SRR5892192 | PRJNA396840 | SRR5892236 | human_oral |\n",
+ "| X4120 | PRJNA396840 | SRR5892195 | PRJNA396840 | SRR5892235 | human_oral |\n",
+ "| X4096 | PRJNA396840 | SRR5892200 | PRJNA396840 | SRR5892238 | human_oral |\n",
+ "| X4092 | PRJNA396840 | SRR5892204 | PRJNA396840 | SRR5892237 | human_oral |\n",
+ "| X4104 | PRJNA396840 | SRR5892205 | PRJNA396840 | SRR5892240 | human_oral |\n",
+ "| X4100 | PRJNA396840 | SRR5892207 | PRJNA396840 | SRR5892239 | human_oral |\n",
+ "| X4024 | PRJNA396840 | SRR5892218 | PRJNA396840 | SRR5892196 | human_oral |\n",
+ "| X4020 | PRJNA396840 | SRR5892219 | PRJNA396840 | SRR5892197 | human_oral |\n",
+ "| X4014 | PRJNA396840 | SRR5892220 | PRJNA396840 | SRR5892198 | human_oral |\n",
+ "| X4012 | PRJNA396840 | SRR5892221 | PRJNA396840 | SRR5892199 | human_oral |\n",
+ "| X4040 | PRJNA396840 | SRR5892222 | PRJNA396840 | SRR5892206 | human_oral |\n",
+ "| X4036 | PRJNA396840 | SRR5892223 | PRJNA396840 | SRR5892201 | human_oral |\n",
+ "| X4032 | PRJNA396840 | SRR5892224 | PRJNA396840 | SRR5892202 | human_oral |\n",
+ "| X4028 | PRJNA396840 | SRR5892225 | PRJNA396840 | SRR5892203 | human_oral |\n",
+ "| X4048 | PRJNA396840 | SRR5892226 | PRJNA396840 | SRR5892193 | human_oral |\n",
+ "| X4044 | PRJNA396840 | SRR5892227 | PRJNA396840 | SRR5892194 | human_oral |\n",
+ "| X4132 | PRJNA396840 | SRR5892228 | PRJNA396840 | SRR5892232 | human_oral |\n",
+ "| X4128 | PRJNA396840 | SRR5892229 | PRJNA396840 | SRR5892231 | human_oral |\n",
+ "| X4116 | PRJNA396840 | SRR5892230 | PRJNA396840 | SRR5892234 | human_oral |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 X4074 PRJNA396840 SRR5892181 PRJNA396840 \n",
+ "2 X4080 PRJNA396840 SRR5892182 PRJNA396840 \n",
+ "3 X4068 PRJNA396840 SRR5892183 PRJNA396840 \n",
+ "4 X4072 PRJNA396840 SRR5892184 PRJNA396840 \n",
+ "5 X4060 PRJNA396840 SRR5892185 PRJNA396840 \n",
+ "6 X4064 PRJNA396840 SRR5892186 PRJNA396840 \n",
+ "7 X4050 PRJNA396840 SRR5892187 PRJNA396840 \n",
+ "8 X4056 PRJNA396840 SRR5892188 PRJNA396840 \n",
+ "9 X4108 PRJNA396840 SRR5892189 PRJNA396840 \n",
+ "10 X4082 PRJNA396840 SRR5892190 PRJNA396840 \n",
+ "11 X4088 PRJNA396840 SRR5892191 PRJNA396840 \n",
+ "12 X4124 PRJNA396840 SRR5892192 PRJNA396840 \n",
+ "13 X4120 PRJNA396840 SRR5892195 PRJNA396840 \n",
+ "14 X4096 PRJNA396840 SRR5892200 PRJNA396840 \n",
+ "15 X4092 PRJNA396840 SRR5892204 PRJNA396840 \n",
+ "16 X4104 PRJNA396840 SRR5892205 PRJNA396840 \n",
+ "17 X4100 PRJNA396840 SRR5892207 PRJNA396840 \n",
+ "18 X4024 PRJNA396840 SRR5892218 PRJNA396840 \n",
+ "19 X4020 PRJNA396840 SRR5892219 PRJNA396840 \n",
+ "20 X4014 PRJNA396840 SRR5892220 PRJNA396840 \n",
+ "21 X4012 PRJNA396840 SRR5892221 PRJNA396840 \n",
+ "22 X4040 PRJNA396840 SRR5892222 PRJNA396840 \n",
+ "23 X4036 PRJNA396840 SRR5892223 PRJNA396840 \n",
+ "24 X4032 PRJNA396840 SRR5892224 PRJNA396840 \n",
+ "25 X4028 PRJNA396840 SRR5892225 PRJNA396840 \n",
+ "26 X4048 PRJNA396840 SRR5892226 PRJNA396840 \n",
+ "27 X4044 PRJNA396840 SRR5892227 PRJNA396840 \n",
+ "28 X4132 PRJNA396840 SRR5892228 PRJNA396840 \n",
+ "29 X4128 PRJNA396840 SRR5892229 PRJNA396840 \n",
+ "30 X4116 PRJNA396840 SRR5892230 PRJNA396840 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR5892217 human_oral \n",
+ "2 SRR5892216 human_oral \n",
+ "3 SRR5892215 human_oral \n",
+ "4 SRR5892214 human_oral \n",
+ "5 SRR5892213 human_oral \n",
+ "6 SRR5892212 human_oral \n",
+ "7 SRR5892211 human_oral \n",
+ "8 SRR5892210 human_oral \n",
+ "9 SRR5892233 human_oral \n",
+ "10 SRR5892209 human_oral \n",
+ "11 SRR5892208 human_oral \n",
+ "12 SRR5892236 human_oral \n",
+ "13 SRR5892235 human_oral \n",
+ "14 SRR5892238 human_oral \n",
+ "15 SRR5892237 human_oral \n",
+ "16 SRR5892240 human_oral \n",
+ "17 SRR5892239 human_oral \n",
+ "18 SRR5892196 human_oral \n",
+ "19 SRR5892197 human_oral \n",
+ "20 SRR5892198 human_oral \n",
+ "21 SRR5892199 human_oral \n",
+ "22 SRR5892206 human_oral \n",
+ "23 SRR5892201 human_oral \n",
+ "24 SRR5892202 human_oral \n",
+ "25 SRR5892203 human_oral \n",
+ "26 SRR5892193 human_oral \n",
+ "27 SRR5892194 human_oral \n",
+ "28 SRR5892232 human_oral \n",
+ "29 SRR5892231 human_oral \n",
+ "30 SRR5892234 human_oral "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna396840_mtx <- prjna396840 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"RNA\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna396840_mgx <- prjna396840 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"DNA\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna396840_mtx_vs_mgx <- left_join(prjna396840_mtx, prjna396840_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"human_oral\")\n",
+ "\n",
+ "prjna396840_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52d148ea",
+ "metadata": {},
+ "source": [
+ "## PRJNA492158"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "f946d7cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna492158 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA492158&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "c5490b6a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 26 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tL2 | PRJNA492158 | SRR7880350 | PRJNA492158 | SRR7880198 | human_gut |
\n",
+ "\tL3 | PRJNA492158 | SRR7880351 | PRJNA492158 | SRR7880201 | human_gut |
\n",
+ "\tL5 | PRJNA492158 | SRR7880352 | PRJNA492158 | SRR7880203 | human_gut |
\n",
+ "\tL6 | PRJNA492158 | SRR7880353 | PRJNA492158 | SRR7880202 | human_gut |
\n",
+ "\tL7 | PRJNA492158 | SRR7880354 | PRJNA492158 | SRR7880205 | human_gut |
\n",
+ "\tL8 | PRJNA492158 | SRR7880355 | PRJNA492158 | SRR7880204 | human_gut |
\n",
+ "\tL9 | PRJNA492158 | SRR7880356 | PRJNA492158 | SRR7880207 | human_gut |
\n",
+ "\tL11 | PRJNA492158 | SRR7880357 | PRJNA492158 | SRR7880227 | human_gut |
\n",
+ "\tL12 | PRJNA492158 | SRR7880358 | PRJNA492158 | SRR7880226 | human_gut |
\n",
+ "\tO1 | PRJNA492158 | SRR7880359 | PRJNA492158 | SRR7880225 | human_gut |
\n",
+ "\tV6 | PRJNA492158 | SRR7880360 | PRJNA492158 | SRR7880217 | human_gut |
\n",
+ "\tV5 | PRJNA492158 | SRR7880361 | PRJNA492158 | SRR7880216 | human_gut |
\n",
+ "\tV8 | PRJNA492158 | SRR7880362 | PRJNA492158 | SRR7880218 | human_gut |
\n",
+ "\tV7 | PRJNA492158 | SRR7880363 | PRJNA492158 | SRR7880219 | human_gut |
\n",
+ "\tV11 | PRJNA492158 | SRR7880364 | PRJNA492158 | SRR7880223 | human_gut |
\n",
+ "\tV9 | PRJNA492158 | SRR7880365 | PRJNA492158 | SRR7880197 | human_gut |
\n",
+ "\tO6 | PRJNA492158 | SRR7880366 | PRJNA492158 | SRR7880228 | human_gut |
\n",
+ "\tO8 | PRJNA492158 | SRR7880367 | PRJNA492158 | SRR7880220 | human_gut |
\n",
+ "\tO9 | PRJNA492158 | SRR7880368 | PRJNA492158 | SRR7880210 | human_gut |
\n",
+ "\tO10 | PRJNA492158 | SRR7880369 | PRJNA492158 | SRR7880211 | human_gut |
\n",
+ "\tO2 | PRJNA492158 | SRR7880370 | PRJNA492158 | SRR7880224 | human_gut |
\n",
+ "\tO3 | PRJNA492158 | SRR7880371 | PRJNA492158 | SRR7880231 | human_gut |
\n",
+ "\tO4 | PRJNA492158 | SRR7880372 | PRJNA492158 | SRR7880230 | human_gut |
\n",
+ "\tO5 | PRJNA492158 | SRR7880373 | PRJNA492158 | SRR7880229 | human_gut |
\n",
+ "\tO11 | PRJNA492158 | SRR7880374 | PRJNA492158 | SRR7880208 | human_gut |
\n",
+ "\tO12 | PRJNA492158 | SRR7880375 | PRJNA492158 | SRR7880209 | human_gut |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 26 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t L2 & PRJNA492158 & SRR7880350 & PRJNA492158 & SRR7880198 & human\\_gut\\\\\n",
+ "\t L3 & PRJNA492158 & SRR7880351 & PRJNA492158 & SRR7880201 & human\\_gut\\\\\n",
+ "\t L5 & PRJNA492158 & SRR7880352 & PRJNA492158 & SRR7880203 & human\\_gut\\\\\n",
+ "\t L6 & PRJNA492158 & SRR7880353 & PRJNA492158 & SRR7880202 & human\\_gut\\\\\n",
+ "\t L7 & PRJNA492158 & SRR7880354 & PRJNA492158 & SRR7880205 & human\\_gut\\\\\n",
+ "\t L8 & PRJNA492158 & SRR7880355 & PRJNA492158 & SRR7880204 & human\\_gut\\\\\n",
+ "\t L9 & PRJNA492158 & SRR7880356 & PRJNA492158 & SRR7880207 & human\\_gut\\\\\n",
+ "\t L11 & PRJNA492158 & SRR7880357 & PRJNA492158 & SRR7880227 & human\\_gut\\\\\n",
+ "\t L12 & PRJNA492158 & SRR7880358 & PRJNA492158 & SRR7880226 & human\\_gut\\\\\n",
+ "\t O1 & PRJNA492158 & SRR7880359 & PRJNA492158 & SRR7880225 & human\\_gut\\\\\n",
+ "\t V6 & PRJNA492158 & SRR7880360 & PRJNA492158 & SRR7880217 & human\\_gut\\\\\n",
+ "\t V5 & PRJNA492158 & SRR7880361 & PRJNA492158 & SRR7880216 & human\\_gut\\\\\n",
+ "\t V8 & PRJNA492158 & SRR7880362 & PRJNA492158 & SRR7880218 & human\\_gut\\\\\n",
+ "\t V7 & PRJNA492158 & SRR7880363 & PRJNA492158 & SRR7880219 & human\\_gut\\\\\n",
+ "\t V11 & PRJNA492158 & SRR7880364 & PRJNA492158 & SRR7880223 & human\\_gut\\\\\n",
+ "\t V9 & PRJNA492158 & SRR7880365 & PRJNA492158 & SRR7880197 & human\\_gut\\\\\n",
+ "\t O6 & PRJNA492158 & SRR7880366 & PRJNA492158 & SRR7880228 & human\\_gut\\\\\n",
+ "\t O8 & PRJNA492158 & SRR7880367 & PRJNA492158 & SRR7880220 & human\\_gut\\\\\n",
+ "\t O9 & PRJNA492158 & SRR7880368 & PRJNA492158 & SRR7880210 & human\\_gut\\\\\n",
+ "\t O10 & PRJNA492158 & SRR7880369 & PRJNA492158 & SRR7880211 & human\\_gut\\\\\n",
+ "\t O2 & PRJNA492158 & SRR7880370 & PRJNA492158 & SRR7880224 & human\\_gut\\\\\n",
+ "\t O3 & PRJNA492158 & SRR7880371 & PRJNA492158 & SRR7880231 & human\\_gut\\\\\n",
+ "\t O4 & PRJNA492158 & SRR7880372 & PRJNA492158 & SRR7880230 & human\\_gut\\\\\n",
+ "\t O5 & PRJNA492158 & SRR7880373 & PRJNA492158 & SRR7880229 & human\\_gut\\\\\n",
+ "\t O11 & PRJNA492158 & SRR7880374 & PRJNA492158 & SRR7880208 & human\\_gut\\\\\n",
+ "\t O12 & PRJNA492158 & SRR7880375 & PRJNA492158 & SRR7880209 & human\\_gut\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 26 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| L2 | PRJNA492158 | SRR7880350 | PRJNA492158 | SRR7880198 | human_gut |\n",
+ "| L3 | PRJNA492158 | SRR7880351 | PRJNA492158 | SRR7880201 | human_gut |\n",
+ "| L5 | PRJNA492158 | SRR7880352 | PRJNA492158 | SRR7880203 | human_gut |\n",
+ "| L6 | PRJNA492158 | SRR7880353 | PRJNA492158 | SRR7880202 | human_gut |\n",
+ "| L7 | PRJNA492158 | SRR7880354 | PRJNA492158 | SRR7880205 | human_gut |\n",
+ "| L8 | PRJNA492158 | SRR7880355 | PRJNA492158 | SRR7880204 | human_gut |\n",
+ "| L9 | PRJNA492158 | SRR7880356 | PRJNA492158 | SRR7880207 | human_gut |\n",
+ "| L11 | PRJNA492158 | SRR7880357 | PRJNA492158 | SRR7880227 | human_gut |\n",
+ "| L12 | PRJNA492158 | SRR7880358 | PRJNA492158 | SRR7880226 | human_gut |\n",
+ "| O1 | PRJNA492158 | SRR7880359 | PRJNA492158 | SRR7880225 | human_gut |\n",
+ "| V6 | PRJNA492158 | SRR7880360 | PRJNA492158 | SRR7880217 | human_gut |\n",
+ "| V5 | PRJNA492158 | SRR7880361 | PRJNA492158 | SRR7880216 | human_gut |\n",
+ "| V8 | PRJNA492158 | SRR7880362 | PRJNA492158 | SRR7880218 | human_gut |\n",
+ "| V7 | PRJNA492158 | SRR7880363 | PRJNA492158 | SRR7880219 | human_gut |\n",
+ "| V11 | PRJNA492158 | SRR7880364 | PRJNA492158 | SRR7880223 | human_gut |\n",
+ "| V9 | PRJNA492158 | SRR7880365 | PRJNA492158 | SRR7880197 | human_gut |\n",
+ "| O6 | PRJNA492158 | SRR7880366 | PRJNA492158 | SRR7880228 | human_gut |\n",
+ "| O8 | PRJNA492158 | SRR7880367 | PRJNA492158 | SRR7880220 | human_gut |\n",
+ "| O9 | PRJNA492158 | SRR7880368 | PRJNA492158 | SRR7880210 | human_gut |\n",
+ "| O10 | PRJNA492158 | SRR7880369 | PRJNA492158 | SRR7880211 | human_gut |\n",
+ "| O2 | PRJNA492158 | SRR7880370 | PRJNA492158 | SRR7880224 | human_gut |\n",
+ "| O3 | PRJNA492158 | SRR7880371 | PRJNA492158 | SRR7880231 | human_gut |\n",
+ "| O4 | PRJNA492158 | SRR7880372 | PRJNA492158 | SRR7880230 | human_gut |\n",
+ "| O5 | PRJNA492158 | SRR7880373 | PRJNA492158 | SRR7880229 | human_gut |\n",
+ "| O11 | PRJNA492158 | SRR7880374 | PRJNA492158 | SRR7880208 | human_gut |\n",
+ "| O12 | PRJNA492158 | SRR7880375 | PRJNA492158 | SRR7880209 | human_gut |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 L2 PRJNA492158 SRR7880350 PRJNA492158 \n",
+ "2 L3 PRJNA492158 SRR7880351 PRJNA492158 \n",
+ "3 L5 PRJNA492158 SRR7880352 PRJNA492158 \n",
+ "4 L6 PRJNA492158 SRR7880353 PRJNA492158 \n",
+ "5 L7 PRJNA492158 SRR7880354 PRJNA492158 \n",
+ "6 L8 PRJNA492158 SRR7880355 PRJNA492158 \n",
+ "7 L9 PRJNA492158 SRR7880356 PRJNA492158 \n",
+ "8 L11 PRJNA492158 SRR7880357 PRJNA492158 \n",
+ "9 L12 PRJNA492158 SRR7880358 PRJNA492158 \n",
+ "10 O1 PRJNA492158 SRR7880359 PRJNA492158 \n",
+ "11 V6 PRJNA492158 SRR7880360 PRJNA492158 \n",
+ "12 V5 PRJNA492158 SRR7880361 PRJNA492158 \n",
+ "13 V8 PRJNA492158 SRR7880362 PRJNA492158 \n",
+ "14 V7 PRJNA492158 SRR7880363 PRJNA492158 \n",
+ "15 V11 PRJNA492158 SRR7880364 PRJNA492158 \n",
+ "16 V9 PRJNA492158 SRR7880365 PRJNA492158 \n",
+ "17 O6 PRJNA492158 SRR7880366 PRJNA492158 \n",
+ "18 O8 PRJNA492158 SRR7880367 PRJNA492158 \n",
+ "19 O9 PRJNA492158 SRR7880368 PRJNA492158 \n",
+ "20 O10 PRJNA492158 SRR7880369 PRJNA492158 \n",
+ "21 O2 PRJNA492158 SRR7880370 PRJNA492158 \n",
+ "22 O3 PRJNA492158 SRR7880371 PRJNA492158 \n",
+ "23 O4 PRJNA492158 SRR7880372 PRJNA492158 \n",
+ "24 O5 PRJNA492158 SRR7880373 PRJNA492158 \n",
+ "25 O11 PRJNA492158 SRR7880374 PRJNA492158 \n",
+ "26 O12 PRJNA492158 SRR7880375 PRJNA492158 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR7880198 human_gut \n",
+ "2 SRR7880201 human_gut \n",
+ "3 SRR7880203 human_gut \n",
+ "4 SRR7880202 human_gut \n",
+ "5 SRR7880205 human_gut \n",
+ "6 SRR7880204 human_gut \n",
+ "7 SRR7880207 human_gut \n",
+ "8 SRR7880227 human_gut \n",
+ "9 SRR7880226 human_gut \n",
+ "10 SRR7880225 human_gut \n",
+ "11 SRR7880217 human_gut \n",
+ "12 SRR7880216 human_gut \n",
+ "13 SRR7880218 human_gut \n",
+ "14 SRR7880219 human_gut \n",
+ "15 SRR7880223 human_gut \n",
+ "16 SRR7880197 human_gut \n",
+ "17 SRR7880228 human_gut \n",
+ "18 SRR7880220 human_gut \n",
+ "19 SRR7880210 human_gut \n",
+ "20 SRR7880211 human_gut \n",
+ "21 SRR7880224 human_gut \n",
+ "22 SRR7880231 human_gut \n",
+ "23 SRR7880230 human_gut \n",
+ "24 SRR7880229 human_gut \n",
+ "25 SRR7880208 human_gut \n",
+ "26 SRR7880209 human_gut "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna492158_mtx <- prjna492158 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna492158_mgx <- prjna492158 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna492158_mtx_vs_mgx <- left_join(prjna492158_mtx, prjna492158_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"human_gut\")\n",
+ "\n",
+ "prjna492158_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c6297be8",
+ "metadata": {},
+ "source": [
+ "## PRJNA278075"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "e2bbc1d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna278075 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA278075&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "c04f4678",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 1 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\t3m_Station6_GOM | PRJNA278075 | SRR1918203 | PRJNA278075 | SRR2001210 | ocean |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 1 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t 3m\\_Station6\\_GOM & PRJNA278075 & SRR1918203 & PRJNA278075 & SRR2001210 & ocean\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 1 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| 3m_Station6_GOM | PRJNA278075 | SRR1918203 | PRJNA278075 | SRR2001210 | ocean |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 3m_Station6_GOM PRJNA278075 SRR1918203 PRJNA278075 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR2001210 ocean "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna278075_mtx <- prjna278075 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"_Metatranscriptome\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna278075_mgx <- prjna278075 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"_Metagenome\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna278075_mtx_vs_mgx <- left_join(prjna278075_mtx, prjna278075_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"ocean\")\n",
+ "\n",
+ "prjna278075_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "47b51151",
+ "metadata": {},
+ "source": [
+ "## PRJEB38017"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "2ffa1577",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjeb38017 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB38017&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "de8f831d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 3 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tCRF2_7 | PRJEB38017 | ERR4077213 | PRJEB38017 | ERR4073751 | cocoa_box_fermentation |
\n",
+ "\tCRF2_20 | PRJEB38017 | ERR4077214 | PRJEB38017 | ERR4073752 | cocoa_box_fermentation |
\n",
+ "\tCRF2_68 | PRJEB38017 | ERR4077215 | PRJEB38017 | ERR4073754 | cocoa_box_fermentation |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 3 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t CRF2\\_7 & PRJEB38017 & ERR4077213 & PRJEB38017 & ERR4073751 & cocoa\\_box\\_fermentation\\\\\n",
+ "\t CRF2\\_20 & PRJEB38017 & ERR4077214 & PRJEB38017 & ERR4073752 & cocoa\\_box\\_fermentation\\\\\n",
+ "\t CRF2\\_68 & PRJEB38017 & ERR4077215 & PRJEB38017 & ERR4073754 & cocoa\\_box\\_fermentation\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 3 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| CRF2_7 | PRJEB38017 | ERR4077213 | PRJEB38017 | ERR4073751 | cocoa_box_fermentation |\n",
+ "| CRF2_20 | PRJEB38017 | ERR4077214 | PRJEB38017 | ERR4073752 | cocoa_box_fermentation |\n",
+ "| CRF2_68 | PRJEB38017 | ERR4077215 | PRJEB38017 | ERR4073754 | cocoa_box_fermentation |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 CRF2_7 PRJEB38017 ERR4077213 PRJEB38017 \n",
+ "2 CRF2_20 PRJEB38017 ERR4077214 PRJEB38017 \n",
+ "3 CRF2_68 PRJEB38017 ERR4077215 PRJEB38017 \n",
+ " mgx_run_accession sample_type \n",
+ "1 ERR4073751 cocoa_box_fermentation\n",
+ "2 ERR4073752 cocoa_box_fermentation\n",
+ "3 ERR4073754 cocoa_box_fermentation"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjeb38017_mtx <- prjeb38017 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"CRF\", \"\", sample_alias),\n",
+ " sample_name = gsub(\"R\", \"_\", sample_name),\n",
+ " sample_name = paste0(\"CRF\", sample_name)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb38017_mgx <- prjeb38017 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\"D\", \"_\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb38017_mtx_vs_mgx <- left_join(prjeb38017_mtx, prjeb38017_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"cocoa_box_fermentation\")\n",
+ "\n",
+ "prjeb38017_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f35357e1",
+ "metadata": {},
+ "source": [
+ "## PRJNA616041"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "3fe05289",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna616041 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA616041&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "35e3b96e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tHP_AsLow | PRJNA616041 | SRR11450577 | PRJNA616041 | SRR11450583 | paddy_soil |
\n",
+ "\tSKS_AsHig | PRJNA616041 | SRR11450578 | PRJNA616041 | SRR11450584 | paddy_soil |
\n",
+ "\tCZ_AsHig | PRJNA616041 | SRR11450579 | PRJNA616041 | SRR11450587 | paddy_soil |
\n",
+ "\tCL_AsHig | PRJNA616041 | SRR11450580 | PRJNA616041 | SRR11450588 | paddy_soil |
\n",
+ "\tYCP_AsLow | PRJNA616041 | SRR11450585 | PRJNA616041 | SRR11450581 | paddy_soil |
\n",
+ "\tLH_AsLow | PRJNA616041 | SRR11450586 | PRJNA616041 | SRR11450582 | paddy_soil |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 6 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t HP\\_AsLow & PRJNA616041 & SRR11450577 & PRJNA616041 & SRR11450583 & paddy\\_soil\\\\\n",
+ "\t SKS\\_AsHig & PRJNA616041 & SRR11450578 & PRJNA616041 & SRR11450584 & paddy\\_soil\\\\\n",
+ "\t CZ\\_AsHig & PRJNA616041 & SRR11450579 & PRJNA616041 & SRR11450587 & paddy\\_soil\\\\\n",
+ "\t CL\\_AsHig & PRJNA616041 & SRR11450580 & PRJNA616041 & SRR11450588 & paddy\\_soil\\\\\n",
+ "\t YCP\\_AsLow & PRJNA616041 & SRR11450585 & PRJNA616041 & SRR11450581 & paddy\\_soil\\\\\n",
+ "\t LH\\_AsLow & PRJNA616041 & SRR11450586 & PRJNA616041 & SRR11450582 & paddy\\_soil\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| HP_AsLow | PRJNA616041 | SRR11450577 | PRJNA616041 | SRR11450583 | paddy_soil |\n",
+ "| SKS_AsHig | PRJNA616041 | SRR11450578 | PRJNA616041 | SRR11450584 | paddy_soil |\n",
+ "| CZ_AsHig | PRJNA616041 | SRR11450579 | PRJNA616041 | SRR11450587 | paddy_soil |\n",
+ "| CL_AsHig | PRJNA616041 | SRR11450580 | PRJNA616041 | SRR11450588 | paddy_soil |\n",
+ "| YCP_AsLow | PRJNA616041 | SRR11450585 | PRJNA616041 | SRR11450581 | paddy_soil |\n",
+ "| LH_AsLow | PRJNA616041 | SRR11450586 | PRJNA616041 | SRR11450582 | paddy_soil |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 HP_AsLow PRJNA616041 SRR11450577 PRJNA616041 \n",
+ "2 SKS_AsHig PRJNA616041 SRR11450578 PRJNA616041 \n",
+ "3 CZ_AsHig PRJNA616041 SRR11450579 PRJNA616041 \n",
+ "4 CL_AsHig PRJNA616041 SRR11450580 PRJNA616041 \n",
+ "5 YCP_AsLow PRJNA616041 SRR11450585 PRJNA616041 \n",
+ "6 LH_AsLow PRJNA616041 SRR11450586 PRJNA616041 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR11450583 paddy_soil \n",
+ "2 SRR11450584 paddy_soil \n",
+ "3 SRR11450587 paddy_soil \n",
+ "4 SRR11450588 paddy_soil \n",
+ "5 SRR11450581 paddy_soil \n",
+ "6 SRR11450582 paddy_soil "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna616041_mtx <- prjna616041 %>% \n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna616041_mgx <- prjna616041 %>% \n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna616041_mtx_vs_mgx <- left_join(prjna616041_mtx, prjna616041_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"paddy_soil\")\n",
+ "\n",
+ "prjna616041_mtx_vs_mgx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d3351487",
+ "metadata": {},
+ "source": [
+ "## PRJNA393770 (mgx) & PRJNA395125 (mtx)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "5b193bc9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna393770 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA393770&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)\n",
+ "\n",
+ "prjna395125 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA395125&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "b7dfdb12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna393770_mgx <- prjna393770 %>%\n",
+ " filter(library_source %in% c(\"METAGENOMIC\")) %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna395125_mtx <- prjna395125 %>%\n",
+ " filter(library_source %in% c(\"METATRANSCRIPTOMIC\")) %>%\n",
+ " mutate(sample_name = gsub(\"_MT\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna395125_vs_prjna393770 <- inner_join(prjna395125_mtx, prjna393770_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"bioreactor\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "718078bd",
+ "metadata": {},
+ "source": [
+ "## PRJEB12284 (mtx) & PRJEB12083 (mgx)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "3c808614",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjeb12284 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB12284&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)\n",
+ "\n",
+ "prjeb12083 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB12083&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "2dfe7df8",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 8 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tAH1 | PRJEB12284 | ERR1198915 | PRJEB12083 | ERR1191817 | wastewater |
\n",
+ "\tAH4 | PRJEB12284 | ERR1198916 | PRJEB12083 | ERR1191820 | wastewater |
\n",
+ "\tAH5 | PRJEB12284 | ERR1198917 | PRJEB12083 | ERR1191821 | wastewater |
\n",
+ "\tAH6 | PRJEB12284 | ERR1198918 | PRJEB12083 | ERR1191822 | wastewater |
\n",
+ "\tDF1 | PRJEB12284 | ERR1198919 | PRJEB12083 | ERR1193331 | wastewater |
\n",
+ "\tDF4 | PRJEB12284 | ERR1198920 | PRJEB12083 | ERR1193299 | wastewater |
\n",
+ "\tDF5 | PRJEB12284 | ERR1198921 | PRJEB12083 | ERR1193300 | wastewater |
\n",
+ "\tDF6 | PRJEB12284 | ERR1198922 | PRJEB12083 | ERR1193301 | wastewater |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 8 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t AH1 & PRJEB12284 & ERR1198915 & PRJEB12083 & ERR1191817 & wastewater\\\\\n",
+ "\t AH4 & PRJEB12284 & ERR1198916 & PRJEB12083 & ERR1191820 & wastewater\\\\\n",
+ "\t AH5 & PRJEB12284 & ERR1198917 & PRJEB12083 & ERR1191821 & wastewater\\\\\n",
+ "\t AH6 & PRJEB12284 & ERR1198918 & PRJEB12083 & ERR1191822 & wastewater\\\\\n",
+ "\t DF1 & PRJEB12284 & ERR1198919 & PRJEB12083 & ERR1193331 & wastewater\\\\\n",
+ "\t DF4 & PRJEB12284 & ERR1198920 & PRJEB12083 & ERR1193299 & wastewater\\\\\n",
+ "\t DF5 & PRJEB12284 & ERR1198921 & PRJEB12083 & ERR1193300 & wastewater\\\\\n",
+ "\t DF6 & PRJEB12284 & ERR1198922 & PRJEB12083 & ERR1193301 & wastewater\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 8 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| AH1 | PRJEB12284 | ERR1198915 | PRJEB12083 | ERR1191817 | wastewater |\n",
+ "| AH4 | PRJEB12284 | ERR1198916 | PRJEB12083 | ERR1191820 | wastewater |\n",
+ "| AH5 | PRJEB12284 | ERR1198917 | PRJEB12083 | ERR1191821 | wastewater |\n",
+ "| AH6 | PRJEB12284 | ERR1198918 | PRJEB12083 | ERR1191822 | wastewater |\n",
+ "| DF1 | PRJEB12284 | ERR1198919 | PRJEB12083 | ERR1193331 | wastewater |\n",
+ "| DF4 | PRJEB12284 | ERR1198920 | PRJEB12083 | ERR1193299 | wastewater |\n",
+ "| DF5 | PRJEB12284 | ERR1198921 | PRJEB12083 | ERR1193300 | wastewater |\n",
+ "| DF6 | PRJEB12284 | ERR1198922 | PRJEB12083 | ERR1193301 | wastewater |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 AH1 PRJEB12284 ERR1198915 PRJEB12083 \n",
+ "2 AH4 PRJEB12284 ERR1198916 PRJEB12083 \n",
+ "3 AH5 PRJEB12284 ERR1198917 PRJEB12083 \n",
+ "4 AH6 PRJEB12284 ERR1198918 PRJEB12083 \n",
+ "5 DF1 PRJEB12284 ERR1198919 PRJEB12083 \n",
+ "6 DF4 PRJEB12284 ERR1198920 PRJEB12083 \n",
+ "7 DF5 PRJEB12284 ERR1198921 PRJEB12083 \n",
+ "8 DF6 PRJEB12284 ERR1198922 PRJEB12083 \n",
+ " mgx_run_accession sample_type\n",
+ "1 ERR1191817 wastewater \n",
+ "2 ERR1191820 wastewater \n",
+ "3 ERR1191821 wastewater \n",
+ "4 ERR1191822 wastewater \n",
+ "5 ERR1193331 wastewater \n",
+ "6 ERR1193299 wastewater \n",
+ "7 ERR1193300 wastewater \n",
+ "8 ERR1193301 wastewater "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjeb12284_mtx <- prjeb12284 %>%\n",
+ " mutate(sample_name = gsub(\"\\\\.\", \"\", sample_alias)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb12083_mgx <- prjeb12083 %>%\n",
+ " select(sample_name = sample_alias,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb12284_vs_prjeb12083 <- inner_join(prjeb12284_mtx, prjeb12083_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"wastewater\")\n",
+ "\n",
+ "prjeb12284_vs_prjeb12083"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "256aec2e",
+ "metadata": {},
+ "source": [
+ "## PRJEB32787 (mgx) & PRJEB32788 (mtx)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "6041c9ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjeb32787 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB32787&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)\n",
+ "\n",
+ "prjeb32788 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB32788&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "c46f0f14",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 17 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tBazoSym_LS-ET_5 | PRJEB32788 | ERR3342479 | PRJEB32787 | ERR3342499 | mussel_gill |
\n",
+ "\tBazoSym_LS-ET_4 | PRJEB32788 | ERR3342480 | PRJEB32787 | ERR3342500 | mussel_gill |
\n",
+ "\tBazoSym_LS-MS_3 | PRJEB32788 | ERR3342481 | PRJEB32787 | ERR3342501 | mussel_gill |
\n",
+ "\tBazoSym_LS-MS_2 | PRJEB32788 | ERR3342482 | PRJEB32787 | ERR3342502 | mussel_gill |
\n",
+ "\tBazoSym_LS-MS_1 | PRJEB32788 | ERR3342483 | PRJEB32787 | ERR3342503 | mussel_gill |
\n",
+ "\tBspSym_Li_5 | PRJEB32788 | ERR3342467 | PRJEB32787 | ERR3342486 | mussel_gill |
\n",
+ "\tBspSym_Li_4 | PRJEB32788 | ERR3342468 | PRJEB32787 | ERR3342487 | mussel_gill |
\n",
+ "\tBspSym_Li_3 | PRJEB32788 | ERR3342469 | PRJEB32787 | ERR3342488 | mussel_gill |
\n",
+ "\tBspSym_Li_2 | PRJEB32788 | ERR3342470 | PRJEB32787 | ERR3342489 | mussel_gill |
\n",
+ "\tBspSym_Li_1 | PRJEB32788 | ERR3342471 | PRJEB32787 | ERR3342490 | mussel_gill |
\n",
+ "\tBspSym_Cl_4 | PRJEB32788 | ERR3342472 | PRJEB32787 | ERR3342492 | mussel_gill |
\n",
+ "\tBspSym_Cl_3 | PRJEB32788 | ERR3342473 | PRJEB32787 | ERR3342493 | mussel_gill |
\n",
+ "\tBspSym_Cl_2 | PRJEB32788 | ERR3342474 | PRJEB32787 | ERR3342494 | mussel_gill |
\n",
+ "\tBspSym_Cl_1 | PRJEB32788 | ERR3342475 | PRJEB32787 | ERR3342495 | mussel_gill |
\n",
+ "\tBputSym_Se-AL_3 | PRJEB32788 | ERR3342476 | PRJEB32787 | ERR3342496 | mussel_gill |
\n",
+ "\tBputSym_Se-AL_2 | PRJEB32788 | ERR3342477 | PRJEB32787 | ERR3342497 | mussel_gill |
\n",
+ "\tBputSym_Se-AL_1 | PRJEB32788 | ERR3342478 | PRJEB32787 | ERR3342498 | mussel_gill |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 17 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t BazoSym\\_LS-ET\\_5 & PRJEB32788 & ERR3342479 & PRJEB32787 & ERR3342499 & mussel\\_gill\\\\\n",
+ "\t BazoSym\\_LS-ET\\_4 & PRJEB32788 & ERR3342480 & PRJEB32787 & ERR3342500 & mussel\\_gill\\\\\n",
+ "\t BazoSym\\_LS-MS\\_3 & PRJEB32788 & ERR3342481 & PRJEB32787 & ERR3342501 & mussel\\_gill\\\\\n",
+ "\t BazoSym\\_LS-MS\\_2 & PRJEB32788 & ERR3342482 & PRJEB32787 & ERR3342502 & mussel\\_gill\\\\\n",
+ "\t BazoSym\\_LS-MS\\_1 & PRJEB32788 & ERR3342483 & PRJEB32787 & ERR3342503 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Li\\_5 & PRJEB32788 & ERR3342467 & PRJEB32787 & ERR3342486 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Li\\_4 & PRJEB32788 & ERR3342468 & PRJEB32787 & ERR3342487 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Li\\_3 & PRJEB32788 & ERR3342469 & PRJEB32787 & ERR3342488 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Li\\_2 & PRJEB32788 & ERR3342470 & PRJEB32787 & ERR3342489 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Li\\_1 & PRJEB32788 & ERR3342471 & PRJEB32787 & ERR3342490 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Cl\\_4 & PRJEB32788 & ERR3342472 & PRJEB32787 & ERR3342492 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Cl\\_3 & PRJEB32788 & ERR3342473 & PRJEB32787 & ERR3342493 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Cl\\_2 & PRJEB32788 & ERR3342474 & PRJEB32787 & ERR3342494 & mussel\\_gill\\\\\n",
+ "\t BspSym\\_Cl\\_1 & PRJEB32788 & ERR3342475 & PRJEB32787 & ERR3342495 & mussel\\_gill\\\\\n",
+ "\t BputSym\\_Se-AL\\_3 & PRJEB32788 & ERR3342476 & PRJEB32787 & ERR3342496 & mussel\\_gill\\\\\n",
+ "\t BputSym\\_Se-AL\\_2 & PRJEB32788 & ERR3342477 & PRJEB32787 & ERR3342497 & mussel\\_gill\\\\\n",
+ "\t BputSym\\_Se-AL\\_1 & PRJEB32788 & ERR3342478 & PRJEB32787 & ERR3342498 & mussel\\_gill\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 17 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| BazoSym_LS-ET_5 | PRJEB32788 | ERR3342479 | PRJEB32787 | ERR3342499 | mussel_gill |\n",
+ "| BazoSym_LS-ET_4 | PRJEB32788 | ERR3342480 | PRJEB32787 | ERR3342500 | mussel_gill |\n",
+ "| BazoSym_LS-MS_3 | PRJEB32788 | ERR3342481 | PRJEB32787 | ERR3342501 | mussel_gill |\n",
+ "| BazoSym_LS-MS_2 | PRJEB32788 | ERR3342482 | PRJEB32787 | ERR3342502 | mussel_gill |\n",
+ "| BazoSym_LS-MS_1 | PRJEB32788 | ERR3342483 | PRJEB32787 | ERR3342503 | mussel_gill |\n",
+ "| BspSym_Li_5 | PRJEB32788 | ERR3342467 | PRJEB32787 | ERR3342486 | mussel_gill |\n",
+ "| BspSym_Li_4 | PRJEB32788 | ERR3342468 | PRJEB32787 | ERR3342487 | mussel_gill |\n",
+ "| BspSym_Li_3 | PRJEB32788 | ERR3342469 | PRJEB32787 | ERR3342488 | mussel_gill |\n",
+ "| BspSym_Li_2 | PRJEB32788 | ERR3342470 | PRJEB32787 | ERR3342489 | mussel_gill |\n",
+ "| BspSym_Li_1 | PRJEB32788 | ERR3342471 | PRJEB32787 | ERR3342490 | mussel_gill |\n",
+ "| BspSym_Cl_4 | PRJEB32788 | ERR3342472 | PRJEB32787 | ERR3342492 | mussel_gill |\n",
+ "| BspSym_Cl_3 | PRJEB32788 | ERR3342473 | PRJEB32787 | ERR3342493 | mussel_gill |\n",
+ "| BspSym_Cl_2 | PRJEB32788 | ERR3342474 | PRJEB32787 | ERR3342494 | mussel_gill |\n",
+ "| BspSym_Cl_1 | PRJEB32788 | ERR3342475 | PRJEB32787 | ERR3342495 | mussel_gill |\n",
+ "| BputSym_Se-AL_3 | PRJEB32788 | ERR3342476 | PRJEB32787 | ERR3342496 | mussel_gill |\n",
+ "| BputSym_Se-AL_2 | PRJEB32788 | ERR3342477 | PRJEB32787 | ERR3342497 | mussel_gill |\n",
+ "| BputSym_Se-AL_1 | PRJEB32788 | ERR3342478 | PRJEB32787 | ERR3342498 | mussel_gill |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 BazoSym_LS-ET_5 PRJEB32788 ERR3342479 PRJEB32787 \n",
+ "2 BazoSym_LS-ET_4 PRJEB32788 ERR3342480 PRJEB32787 \n",
+ "3 BazoSym_LS-MS_3 PRJEB32788 ERR3342481 PRJEB32787 \n",
+ "4 BazoSym_LS-MS_2 PRJEB32788 ERR3342482 PRJEB32787 \n",
+ "5 BazoSym_LS-MS_1 PRJEB32788 ERR3342483 PRJEB32787 \n",
+ "6 BspSym_Li_5 PRJEB32788 ERR3342467 PRJEB32787 \n",
+ "7 BspSym_Li_4 PRJEB32788 ERR3342468 PRJEB32787 \n",
+ "8 BspSym_Li_3 PRJEB32788 ERR3342469 PRJEB32787 \n",
+ "9 BspSym_Li_2 PRJEB32788 ERR3342470 PRJEB32787 \n",
+ "10 BspSym_Li_1 PRJEB32788 ERR3342471 PRJEB32787 \n",
+ "11 BspSym_Cl_4 PRJEB32788 ERR3342472 PRJEB32787 \n",
+ "12 BspSym_Cl_3 PRJEB32788 ERR3342473 PRJEB32787 \n",
+ "13 BspSym_Cl_2 PRJEB32788 ERR3342474 PRJEB32787 \n",
+ "14 BspSym_Cl_1 PRJEB32788 ERR3342475 PRJEB32787 \n",
+ "15 BputSym_Se-AL_3 PRJEB32788 ERR3342476 PRJEB32787 \n",
+ "16 BputSym_Se-AL_2 PRJEB32788 ERR3342477 PRJEB32787 \n",
+ "17 BputSym_Se-AL_1 PRJEB32788 ERR3342478 PRJEB32787 \n",
+ " mgx_run_accession sample_type\n",
+ "1 ERR3342499 mussel_gill\n",
+ "2 ERR3342500 mussel_gill\n",
+ "3 ERR3342501 mussel_gill\n",
+ "4 ERR3342502 mussel_gill\n",
+ "5 ERR3342503 mussel_gill\n",
+ "6 ERR3342486 mussel_gill\n",
+ "7 ERR3342487 mussel_gill\n",
+ "8 ERR3342488 mussel_gill\n",
+ "9 ERR3342489 mussel_gill\n",
+ "10 ERR3342490 mussel_gill\n",
+ "11 ERR3342492 mussel_gill\n",
+ "12 ERR3342493 mussel_gill\n",
+ "13 ERR3342494 mussel_gill\n",
+ "14 ERR3342495 mussel_gill\n",
+ "15 ERR3342496 mussel_gill\n",
+ "16 ERR3342497 mussel_gill\n",
+ "17 ERR3342498 mussel_gill"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjeb32787_mgx <- prjeb32787 %>%\n",
+ " mutate(sample_name = gsub(\"_[^_]*$\", \"\", sample_title)) %>%\n",
+ " select(sample_name,\n",
+ " mgx_study_accession = study_accession,\n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb32788_mtx <- prjeb32788 %>%\n",
+ " mutate(sample_name = gsub(\"_[^_]*$\", \"\", sample_title)) %>%\n",
+ " select(sample_name,\n",
+ " mtx_study_accession = study_accession,\n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb32788_vs_prjeb32781_1 <- left_join(prjeb32788_mtx, prjeb32787_mgx, by = \"sample_name\") %>%\n",
+ " filter(!is.na(mgx_study_accession))\n",
+ "\n",
+ "prjeb32788_mtx_2 <- prjeb32788_mtx %>%\n",
+ " mutate(sample_name = gsub(\"_[^_]*$\", \"\", sample_name)) \n",
+ "\n",
+ "prjeb32788_vs_prjeb32781_2 <- left_join(prjeb32788_mtx_2, prjeb32787_mgx, by = \"sample_name\") %>%\n",
+ " filter(!is.na(mgx_study_accession))\n",
+ "\n",
+ "prjeb32788_vs_prjeb32781 <- bind_rows(prjeb32788_vs_prjeb32781_1, prjeb32788_vs_prjeb32781_2) %>%\n",
+ " mutate(sample_type = \"mussel_gill\")\n",
+ "\n",
+ "prjeb32788_vs_prjeb32781 "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cd95fc8c",
+ "metadata": {},
+ "source": [
+ "## iHMP (PRJNA398089)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "1f0ccf12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna398089 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA398089&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "dd826c94",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 761 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tCSM67UC6 | PRJNA398089 | SRR5949109 | PRJNA398089 | SRR5936217 | human_gut |
\n",
+ "\tCSM67UDY | PRJNA398089 | SRR5949110 | PRJNA398089 | SRR5936216 | human_gut |
\n",
+ "\tHSM67VI9 | PRJNA398089 | SRR5949111 | PRJNA398089 | SRR5936212 | human_gut |
\n",
+ "\tHSM6XRTQ | PRJNA398089 | SRR5949112 | PRJNA398089 | SRR5936211 | human_gut |
\n",
+ "\tCSM79HHM | PRJNA398089 | SRR5949113 | PRJNA398089 | SRR5946811 | human_gut |
\n",
+ "\tCSM79HIR | PRJNA398089 | SRR5949114 | PRJNA398089 | SRR5936215 | human_gut |
\n",
+ "\tCSM79HJO | PRJNA398089 | SRR5949115 | PRJNA398089 | SRR5947089 | human_gut |
\n",
+ "\tHSM67VEI | PRJNA398089 | SRR5949116 | PRJNA398089 | SRR5936210 | human_gut |
\n",
+ "\tMSM79H9K | PRJNA398089 | SRR5949117 | PRJNA398089 | SRR5935975 | human_gut |
\n",
+ "\tMSM79HAH | PRJNA398089 | SRR5949118 | PRJNA398089 | SRR5935976 | human_gut |
\n",
+ "\tHSM7J4L5 | PRJNA398089 | SRR5949119 | PRJNA398089 | SRR5935953 | human_gut |
\n",
+ "\tMSM79H7E | PRJNA398089 | SRR5949120 | PRJNA398089 | SRR5935950 | human_gut |
\n",
+ "\tPSM7J17Z | PRJNA398089 | SRR5949121 | PRJNA398089 | SRR5935951 | human_gut |
\n",
+ "\tPSM7J18G | PRJNA398089 | SRR5949122 | PRJNA398089 | SRR5935952 | human_gut |
\n",
+ "\tMSM79HDI | PRJNA398089 | SRR5949123 | PRJNA398089 | SRR5935954 | human_gut |
\n",
+ "\tHSM7J4JH | PRJNA398089 | SRR5949124 | PRJNA398089 | SRR5935955 | human_gut |
\n",
+ "\tHSM7J4HU | PRJNA398089 | SRR5949125 | PRJNA398089 | SRR5935956 | human_gut |
\n",
+ "\tPSM7J1CI | PRJNA398089 | SRR5949126 | PRJNA398089 | SRR5935957 | human_gut |
\n",
+ "\tHSM7CZ36 | PRJNA398089 | SRR5949127 | PRJNA398089 | SRR5936016 | human_gut |
\n",
+ "\tCSM7KOJW | PRJNA398089 | SRR5949128 | PRJNA398089 | SRR5947102 | human_gut |
\n",
+ "\tMSM79HDM | PRJNA398089 | SRR5949129 | PRJNA398089 | SRR5936018 | human_gut |
\n",
+ "\tPSM6XBTP | PRJNA398089 | SRR5949130 | PRJNA398089 | SRR5936066 | human_gut |
\n",
+ "\tMSM79HF9 | PRJNA398089 | SRR5949131 | PRJNA398089 | SRR5936064 | human_gut |
\n",
+ "\tCSM79HQF | PRJNA398089 | SRR5949132 | PRJNA398089 | SRR5936119 | human_gut |
\n",
+ "\tMSM79H9A | PRJNA398089 | SRR5949133 | PRJNA398089 | SRR5936118 | human_gut |
\n",
+ "\tHSM7CZ3E | PRJNA398089 | SRR5949134 | PRJNA398089 | SRR5936121 | human_gut |
\n",
+ "\tCSM79HP4 | PRJNA398089 | SRR5949135 | PRJNA398089 | SRR5946645 | human_gut |
\n",
+ "\tHSM7CYY7 | PRJNA398089 | SRR5949136 | PRJNA398089 | SRR5936117 | human_gut |
\n",
+ "\tCSM79HLC | PRJNA398089 | SRR5949137 | PRJNA398089 | SRR5936176 | human_gut |
\n",
+ "\tMSM79H5K | PRJNA398089 | SRR5949138 | PRJNA398089 | SRR5936132 | human_gut |
\n",
+ "\t⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
\n",
+ "\tPSM6XBSM | PRJNA398089 | SRR5963918 | PRJNA398089 | SRR5950713 | human_gut |
\n",
+ "\tMSM79HDQ_TR | PRJNA398089 | SRR5963920 | PRJNA398089 | SRR5962901 | human_gut |
\n",
+ "\tMSM79HDG_TR | PRJNA398089 | SRR5963921 | PRJNA398089 | SRR5962900 | human_gut |
\n",
+ "\tMSM9VZEK_TR | PRJNA398089 | SRR5963922 | PRJNA398089 | SRR5962903 | human_gut |
\n",
+ "\tMSM9VZMA_TR | PRJNA398089 | SRR5963923 | PRJNA398089 | SRR5962902 | human_gut |
\n",
+ "\tPSMA265J_TR | PRJNA398089 | SRR5963924 | PRJNA398089 | SRR5962904 | human_gut |
\n",
+ "\tMSMA26AZ_TR | PRJNA398089 | SRR5963925 | PRJNA398089 | SRR5962908 | human_gut |
\n",
+ "\tHSMA33LH_TR | PRJNA398089 | SRR5963926 | PRJNA398089 | SRR5962907 | human_gut |
\n",
+ "\tMSM9VZNH_TR | PRJNA398089 | SRR5963927 | PRJNA398089 | SRR5962906 | human_gut |
\n",
+ "\tHSMA33RX_TR | PRJNA398089 | SRR5963928 | PRJNA398089 | SRR5962885 | human_gut |
\n",
+ "\tPSM6XBQY_TR | PRJNA398089 | SRR5963929 | PRJNA398089 | SRR5962895 | human_gut |
\n",
+ "\tCSM79HLA_TR | PRJNA398089 | SRR5963930 | PRJNA398089 | SRR5962894 | human_gut |
\n",
+ "\tHSM6XRVC_TR | PRJNA398089 | SRR5963931 | PRJNA398089 | SRR5962893 | human_gut |
\n",
+ "\tHSM5MD6A_TR | PRJNA398089 | SRR5963932 | PRJNA398089 | SRR5962891 | human_gut |
\n",
+ "\tPSM6XBRK_TR | PRJNA398089 | SRR5963933 | PRJNA398089 | SRR5962890 | human_gut |
\n",
+ "\tCSM67UEW_TR | PRJNA398089 | SRR5963934 | PRJNA398089 | SRR5962889 | human_gut |
\n",
+ "\tHSM67VEM_TR | PRJNA398089 | SRR5963935 | PRJNA398089 | SRR5962892 | human_gut |
\n",
+ "\tCSM67UDR_TR | PRJNA398089 | SRR5963936 | PRJNA398089 | SRR5962888 | human_gut |
\n",
+ "\tPSM6XBSU_TR | PRJNA398089 | SRR5963937 | PRJNA398089 | SRR5962897 | human_gut |
\n",
+ "\tMSM79HF9_TR | PRJNA398089 | SRR5963938 | PRJNA398089 | SRR5962898 | human_gut |
\n",
+ "\tPSM6XBRK | PRJNA398089 | SRR5963939 | PRJNA398089 | SRR5935960 | human_gut |
\n",
+ "\tPSM6XBQY | PRJNA398089 | SRR5963940 | PRJNA398089 | SRR5935809 | human_gut |
\n",
+ "\tMSMA26EP | PRJNA398089 | SRR5963941 | PRJNA398089 | SRR5946720 | human_gut |
\n",
+ "\tMSM9VZOW | PRJNA398089 | SRR5963942 | PRJNA398089 | SRR5946733 | human_gut |
\n",
+ "\tPSM6XBQU | PRJNA398089 | SRR5963943 | PRJNA398089 | SRR5936007 | human_gut |
\n",
+ "\tMSMAPC5Z | PRJNA398089 | SRR5963944 | PRJNA398089 | SRR5946832 | human_gut |
\n",
+ "\tMSM6J2M3 | PRJNA398089 | SRR5963945 | PRJNA398089 | SRR5935963 | human_gut |
\n",
+ "\tMSM6J2K6 | PRJNA398089 | SRR5963946 | PRJNA398089 | SRR5935938 | human_gut |
\n",
+ "\tMSM6J2Q1 | PRJNA398089 | SRR5963947 | PRJNA398089 | SRR5936013 | human_gut |
\n",
+ "\tMSM6J2PS | PRJNA398089 | SRR5963948 | PRJNA398089 | SRR5936231 | human_gut |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 761 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t CSM67UC6 & PRJNA398089 & SRR5949109 & PRJNA398089 & SRR5936217 & human\\_gut\\\\\n",
+ "\t CSM67UDY & PRJNA398089 & SRR5949110 & PRJNA398089 & SRR5936216 & human\\_gut\\\\\n",
+ "\t HSM67VI9 & PRJNA398089 & SRR5949111 & PRJNA398089 & SRR5936212 & human\\_gut\\\\\n",
+ "\t HSM6XRTQ & PRJNA398089 & SRR5949112 & PRJNA398089 & SRR5936211 & human\\_gut\\\\\n",
+ "\t CSM79HHM & PRJNA398089 & SRR5949113 & PRJNA398089 & SRR5946811 & human\\_gut\\\\\n",
+ "\t CSM79HIR & PRJNA398089 & SRR5949114 & PRJNA398089 & SRR5936215 & human\\_gut\\\\\n",
+ "\t CSM79HJO & PRJNA398089 & SRR5949115 & PRJNA398089 & SRR5947089 & human\\_gut\\\\\n",
+ "\t HSM67VEI & PRJNA398089 & SRR5949116 & PRJNA398089 & SRR5936210 & human\\_gut\\\\\n",
+ "\t MSM79H9K & PRJNA398089 & SRR5949117 & PRJNA398089 & SRR5935975 & human\\_gut\\\\\n",
+ "\t MSM79HAH & PRJNA398089 & SRR5949118 & PRJNA398089 & SRR5935976 & human\\_gut\\\\\n",
+ "\t HSM7J4L5 & PRJNA398089 & SRR5949119 & PRJNA398089 & SRR5935953 & human\\_gut\\\\\n",
+ "\t MSM79H7E & PRJNA398089 & SRR5949120 & PRJNA398089 & SRR5935950 & human\\_gut\\\\\n",
+ "\t PSM7J17Z & PRJNA398089 & SRR5949121 & PRJNA398089 & SRR5935951 & human\\_gut\\\\\n",
+ "\t PSM7J18G & PRJNA398089 & SRR5949122 & PRJNA398089 & SRR5935952 & human\\_gut\\\\\n",
+ "\t MSM79HDI & PRJNA398089 & SRR5949123 & PRJNA398089 & SRR5935954 & human\\_gut\\\\\n",
+ "\t HSM7J4JH & PRJNA398089 & SRR5949124 & PRJNA398089 & SRR5935955 & human\\_gut\\\\\n",
+ "\t HSM7J4HU & PRJNA398089 & SRR5949125 & PRJNA398089 & SRR5935956 & human\\_gut\\\\\n",
+ "\t PSM7J1CI & PRJNA398089 & SRR5949126 & PRJNA398089 & SRR5935957 & human\\_gut\\\\\n",
+ "\t HSM7CZ36 & PRJNA398089 & SRR5949127 & PRJNA398089 & SRR5936016 & human\\_gut\\\\\n",
+ "\t CSM7KOJW & PRJNA398089 & SRR5949128 & PRJNA398089 & SRR5947102 & human\\_gut\\\\\n",
+ "\t MSM79HDM & PRJNA398089 & SRR5949129 & PRJNA398089 & SRR5936018 & human\\_gut\\\\\n",
+ "\t PSM6XBTP & PRJNA398089 & SRR5949130 & PRJNA398089 & SRR5936066 & human\\_gut\\\\\n",
+ "\t MSM79HF9 & PRJNA398089 & SRR5949131 & PRJNA398089 & SRR5936064 & human\\_gut\\\\\n",
+ "\t CSM79HQF & PRJNA398089 & SRR5949132 & PRJNA398089 & SRR5936119 & human\\_gut\\\\\n",
+ "\t MSM79H9A & PRJNA398089 & SRR5949133 & PRJNA398089 & SRR5936118 & human\\_gut\\\\\n",
+ "\t HSM7CZ3E & PRJNA398089 & SRR5949134 & PRJNA398089 & SRR5936121 & human\\_gut\\\\\n",
+ "\t CSM79HP4 & PRJNA398089 & SRR5949135 & PRJNA398089 & SRR5946645 & human\\_gut\\\\\n",
+ "\t HSM7CYY7 & PRJNA398089 & SRR5949136 & PRJNA398089 & SRR5936117 & human\\_gut\\\\\n",
+ "\t CSM79HLC & PRJNA398089 & SRR5949137 & PRJNA398089 & SRR5936176 & human\\_gut\\\\\n",
+ "\t MSM79H5K & PRJNA398089 & SRR5949138 & PRJNA398089 & SRR5936132 & human\\_gut\\\\\n",
+ "\t ⋮ & ⋮ & ⋮ & ⋮ & ⋮ & ⋮\\\\\n",
+ "\t PSM6XBSM & PRJNA398089 & SRR5963918 & PRJNA398089 & SRR5950713 & human\\_gut\\\\\n",
+ "\t MSM79HDQ\\_TR & PRJNA398089 & SRR5963920 & PRJNA398089 & SRR5962901 & human\\_gut\\\\\n",
+ "\t MSM79HDG\\_TR & PRJNA398089 & SRR5963921 & PRJNA398089 & SRR5962900 & human\\_gut\\\\\n",
+ "\t MSM9VZEK\\_TR & PRJNA398089 & SRR5963922 & PRJNA398089 & SRR5962903 & human\\_gut\\\\\n",
+ "\t MSM9VZMA\\_TR & PRJNA398089 & SRR5963923 & PRJNA398089 & SRR5962902 & human\\_gut\\\\\n",
+ "\t PSMA265J\\_TR & PRJNA398089 & SRR5963924 & PRJNA398089 & SRR5962904 & human\\_gut\\\\\n",
+ "\t MSMA26AZ\\_TR & PRJNA398089 & SRR5963925 & PRJNA398089 & SRR5962908 & human\\_gut\\\\\n",
+ "\t HSMA33LH\\_TR & PRJNA398089 & SRR5963926 & PRJNA398089 & SRR5962907 & human\\_gut\\\\\n",
+ "\t MSM9VZNH\\_TR & PRJNA398089 & SRR5963927 & PRJNA398089 & SRR5962906 & human\\_gut\\\\\n",
+ "\t HSMA33RX\\_TR & PRJNA398089 & SRR5963928 & PRJNA398089 & SRR5962885 & human\\_gut\\\\\n",
+ "\t PSM6XBQY\\_TR & PRJNA398089 & SRR5963929 & PRJNA398089 & SRR5962895 & human\\_gut\\\\\n",
+ "\t CSM79HLA\\_TR & PRJNA398089 & SRR5963930 & PRJNA398089 & SRR5962894 & human\\_gut\\\\\n",
+ "\t HSM6XRVC\\_TR & PRJNA398089 & SRR5963931 & PRJNA398089 & SRR5962893 & human\\_gut\\\\\n",
+ "\t HSM5MD6A\\_TR & PRJNA398089 & SRR5963932 & PRJNA398089 & SRR5962891 & human\\_gut\\\\\n",
+ "\t PSM6XBRK\\_TR & PRJNA398089 & SRR5963933 & PRJNA398089 & SRR5962890 & human\\_gut\\\\\n",
+ "\t CSM67UEW\\_TR & PRJNA398089 & SRR5963934 & PRJNA398089 & SRR5962889 & human\\_gut\\\\\n",
+ "\t HSM67VEM\\_TR & PRJNA398089 & SRR5963935 & PRJNA398089 & SRR5962892 & human\\_gut\\\\\n",
+ "\t CSM67UDR\\_TR & PRJNA398089 & SRR5963936 & PRJNA398089 & SRR5962888 & human\\_gut\\\\\n",
+ "\t PSM6XBSU\\_TR & PRJNA398089 & SRR5963937 & PRJNA398089 & SRR5962897 & human\\_gut\\\\\n",
+ "\t MSM79HF9\\_TR & PRJNA398089 & SRR5963938 & PRJNA398089 & SRR5962898 & human\\_gut\\\\\n",
+ "\t PSM6XBRK & PRJNA398089 & SRR5963939 & PRJNA398089 & SRR5935960 & human\\_gut\\\\\n",
+ "\t PSM6XBQY & PRJNA398089 & SRR5963940 & PRJNA398089 & SRR5935809 & human\\_gut\\\\\n",
+ "\t MSMA26EP & PRJNA398089 & SRR5963941 & PRJNA398089 & SRR5946720 & human\\_gut\\\\\n",
+ "\t MSM9VZOW & PRJNA398089 & SRR5963942 & PRJNA398089 & SRR5946733 & human\\_gut\\\\\n",
+ "\t PSM6XBQU & PRJNA398089 & SRR5963943 & PRJNA398089 & SRR5936007 & human\\_gut\\\\\n",
+ "\t MSMAPC5Z & PRJNA398089 & SRR5963944 & PRJNA398089 & SRR5946832 & human\\_gut\\\\\n",
+ "\t MSM6J2M3 & PRJNA398089 & SRR5963945 & PRJNA398089 & SRR5935963 & human\\_gut\\\\\n",
+ "\t MSM6J2K6 & PRJNA398089 & SRR5963946 & PRJNA398089 & SRR5935938 & human\\_gut\\\\\n",
+ "\t MSM6J2Q1 & PRJNA398089 & SRR5963947 & PRJNA398089 & SRR5936013 & human\\_gut\\\\\n",
+ "\t MSM6J2PS & PRJNA398089 & SRR5963948 & PRJNA398089 & SRR5936231 & human\\_gut\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 761 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| CSM67UC6 | PRJNA398089 | SRR5949109 | PRJNA398089 | SRR5936217 | human_gut |\n",
+ "| CSM67UDY | PRJNA398089 | SRR5949110 | PRJNA398089 | SRR5936216 | human_gut |\n",
+ "| HSM67VI9 | PRJNA398089 | SRR5949111 | PRJNA398089 | SRR5936212 | human_gut |\n",
+ "| HSM6XRTQ | PRJNA398089 | SRR5949112 | PRJNA398089 | SRR5936211 | human_gut |\n",
+ "| CSM79HHM | PRJNA398089 | SRR5949113 | PRJNA398089 | SRR5946811 | human_gut |\n",
+ "| CSM79HIR | PRJNA398089 | SRR5949114 | PRJNA398089 | SRR5936215 | human_gut |\n",
+ "| CSM79HJO | PRJNA398089 | SRR5949115 | PRJNA398089 | SRR5947089 | human_gut |\n",
+ "| HSM67VEI | PRJNA398089 | SRR5949116 | PRJNA398089 | SRR5936210 | human_gut |\n",
+ "| MSM79H9K | PRJNA398089 | SRR5949117 | PRJNA398089 | SRR5935975 | human_gut |\n",
+ "| MSM79HAH | PRJNA398089 | SRR5949118 | PRJNA398089 | SRR5935976 | human_gut |\n",
+ "| HSM7J4L5 | PRJNA398089 | SRR5949119 | PRJNA398089 | SRR5935953 | human_gut |\n",
+ "| MSM79H7E | PRJNA398089 | SRR5949120 | PRJNA398089 | SRR5935950 | human_gut |\n",
+ "| PSM7J17Z | PRJNA398089 | SRR5949121 | PRJNA398089 | SRR5935951 | human_gut |\n",
+ "| PSM7J18G | PRJNA398089 | SRR5949122 | PRJNA398089 | SRR5935952 | human_gut |\n",
+ "| MSM79HDI | PRJNA398089 | SRR5949123 | PRJNA398089 | SRR5935954 | human_gut |\n",
+ "| HSM7J4JH | PRJNA398089 | SRR5949124 | PRJNA398089 | SRR5935955 | human_gut |\n",
+ "| HSM7J4HU | PRJNA398089 | SRR5949125 | PRJNA398089 | SRR5935956 | human_gut |\n",
+ "| PSM7J1CI | PRJNA398089 | SRR5949126 | PRJNA398089 | SRR5935957 | human_gut |\n",
+ "| HSM7CZ36 | PRJNA398089 | SRR5949127 | PRJNA398089 | SRR5936016 | human_gut |\n",
+ "| CSM7KOJW | PRJNA398089 | SRR5949128 | PRJNA398089 | SRR5947102 | human_gut |\n",
+ "| MSM79HDM | PRJNA398089 | SRR5949129 | PRJNA398089 | SRR5936018 | human_gut |\n",
+ "| PSM6XBTP | PRJNA398089 | SRR5949130 | PRJNA398089 | SRR5936066 | human_gut |\n",
+ "| MSM79HF9 | PRJNA398089 | SRR5949131 | PRJNA398089 | SRR5936064 | human_gut |\n",
+ "| CSM79HQF | PRJNA398089 | SRR5949132 | PRJNA398089 | SRR5936119 | human_gut |\n",
+ "| MSM79H9A | PRJNA398089 | SRR5949133 | PRJNA398089 | SRR5936118 | human_gut |\n",
+ "| HSM7CZ3E | PRJNA398089 | SRR5949134 | PRJNA398089 | SRR5936121 | human_gut |\n",
+ "| CSM79HP4 | PRJNA398089 | SRR5949135 | PRJNA398089 | SRR5946645 | human_gut |\n",
+ "| HSM7CYY7 | PRJNA398089 | SRR5949136 | PRJNA398089 | SRR5936117 | human_gut |\n",
+ "| CSM79HLC | PRJNA398089 | SRR5949137 | PRJNA398089 | SRR5936176 | human_gut |\n",
+ "| MSM79H5K | PRJNA398089 | SRR5949138 | PRJNA398089 | SRR5936132 | human_gut |\n",
+ "| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |\n",
+ "| PSM6XBSM | PRJNA398089 | SRR5963918 | PRJNA398089 | SRR5950713 | human_gut |\n",
+ "| MSM79HDQ_TR | PRJNA398089 | SRR5963920 | PRJNA398089 | SRR5962901 | human_gut |\n",
+ "| MSM79HDG_TR | PRJNA398089 | SRR5963921 | PRJNA398089 | SRR5962900 | human_gut |\n",
+ "| MSM9VZEK_TR | PRJNA398089 | SRR5963922 | PRJNA398089 | SRR5962903 | human_gut |\n",
+ "| MSM9VZMA_TR | PRJNA398089 | SRR5963923 | PRJNA398089 | SRR5962902 | human_gut |\n",
+ "| PSMA265J_TR | PRJNA398089 | SRR5963924 | PRJNA398089 | SRR5962904 | human_gut |\n",
+ "| MSMA26AZ_TR | PRJNA398089 | SRR5963925 | PRJNA398089 | SRR5962908 | human_gut |\n",
+ "| HSMA33LH_TR | PRJNA398089 | SRR5963926 | PRJNA398089 | SRR5962907 | human_gut |\n",
+ "| MSM9VZNH_TR | PRJNA398089 | SRR5963927 | PRJNA398089 | SRR5962906 | human_gut |\n",
+ "| HSMA33RX_TR | PRJNA398089 | SRR5963928 | PRJNA398089 | SRR5962885 | human_gut |\n",
+ "| PSM6XBQY_TR | PRJNA398089 | SRR5963929 | PRJNA398089 | SRR5962895 | human_gut |\n",
+ "| CSM79HLA_TR | PRJNA398089 | SRR5963930 | PRJNA398089 | SRR5962894 | human_gut |\n",
+ "| HSM6XRVC_TR | PRJNA398089 | SRR5963931 | PRJNA398089 | SRR5962893 | human_gut |\n",
+ "| HSM5MD6A_TR | PRJNA398089 | SRR5963932 | PRJNA398089 | SRR5962891 | human_gut |\n",
+ "| PSM6XBRK_TR | PRJNA398089 | SRR5963933 | PRJNA398089 | SRR5962890 | human_gut |\n",
+ "| CSM67UEW_TR | PRJNA398089 | SRR5963934 | PRJNA398089 | SRR5962889 | human_gut |\n",
+ "| HSM67VEM_TR | PRJNA398089 | SRR5963935 | PRJNA398089 | SRR5962892 | human_gut |\n",
+ "| CSM67UDR_TR | PRJNA398089 | SRR5963936 | PRJNA398089 | SRR5962888 | human_gut |\n",
+ "| PSM6XBSU_TR | PRJNA398089 | SRR5963937 | PRJNA398089 | SRR5962897 | human_gut |\n",
+ "| MSM79HF9_TR | PRJNA398089 | SRR5963938 | PRJNA398089 | SRR5962898 | human_gut |\n",
+ "| PSM6XBRK | PRJNA398089 | SRR5963939 | PRJNA398089 | SRR5935960 | human_gut |\n",
+ "| PSM6XBQY | PRJNA398089 | SRR5963940 | PRJNA398089 | SRR5935809 | human_gut |\n",
+ "| MSMA26EP | PRJNA398089 | SRR5963941 | PRJNA398089 | SRR5946720 | human_gut |\n",
+ "| MSM9VZOW | PRJNA398089 | SRR5963942 | PRJNA398089 | SRR5946733 | human_gut |\n",
+ "| PSM6XBQU | PRJNA398089 | SRR5963943 | PRJNA398089 | SRR5936007 | human_gut |\n",
+ "| MSMAPC5Z | PRJNA398089 | SRR5963944 | PRJNA398089 | SRR5946832 | human_gut |\n",
+ "| MSM6J2M3 | PRJNA398089 | SRR5963945 | PRJNA398089 | SRR5935963 | human_gut |\n",
+ "| MSM6J2K6 | PRJNA398089 | SRR5963946 | PRJNA398089 | SRR5935938 | human_gut |\n",
+ "| MSM6J2Q1 | PRJNA398089 | SRR5963947 | PRJNA398089 | SRR5936013 | human_gut |\n",
+ "| MSM6J2PS | PRJNA398089 | SRR5963948 | PRJNA398089 | SRR5936231 | human_gut |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 CSM67UC6 PRJNA398089 SRR5949109 PRJNA398089 \n",
+ "2 CSM67UDY PRJNA398089 SRR5949110 PRJNA398089 \n",
+ "3 HSM67VI9 PRJNA398089 SRR5949111 PRJNA398089 \n",
+ "4 HSM6XRTQ PRJNA398089 SRR5949112 PRJNA398089 \n",
+ "5 CSM79HHM PRJNA398089 SRR5949113 PRJNA398089 \n",
+ "6 CSM79HIR PRJNA398089 SRR5949114 PRJNA398089 \n",
+ "7 CSM79HJO PRJNA398089 SRR5949115 PRJNA398089 \n",
+ "8 HSM67VEI PRJNA398089 SRR5949116 PRJNA398089 \n",
+ "9 MSM79H9K PRJNA398089 SRR5949117 PRJNA398089 \n",
+ "10 MSM79HAH PRJNA398089 SRR5949118 PRJNA398089 \n",
+ "11 HSM7J4L5 PRJNA398089 SRR5949119 PRJNA398089 \n",
+ "12 MSM79H7E PRJNA398089 SRR5949120 PRJNA398089 \n",
+ "13 PSM7J17Z PRJNA398089 SRR5949121 PRJNA398089 \n",
+ "14 PSM7J18G PRJNA398089 SRR5949122 PRJNA398089 \n",
+ "15 MSM79HDI PRJNA398089 SRR5949123 PRJNA398089 \n",
+ "16 HSM7J4JH PRJNA398089 SRR5949124 PRJNA398089 \n",
+ "17 HSM7J4HU PRJNA398089 SRR5949125 PRJNA398089 \n",
+ "18 PSM7J1CI PRJNA398089 SRR5949126 PRJNA398089 \n",
+ "19 HSM7CZ36 PRJNA398089 SRR5949127 PRJNA398089 \n",
+ "20 CSM7KOJW PRJNA398089 SRR5949128 PRJNA398089 \n",
+ "21 MSM79HDM PRJNA398089 SRR5949129 PRJNA398089 \n",
+ "22 PSM6XBTP PRJNA398089 SRR5949130 PRJNA398089 \n",
+ "23 MSM79HF9 PRJNA398089 SRR5949131 PRJNA398089 \n",
+ "24 CSM79HQF PRJNA398089 SRR5949132 PRJNA398089 \n",
+ "25 MSM79H9A PRJNA398089 SRR5949133 PRJNA398089 \n",
+ "26 HSM7CZ3E PRJNA398089 SRR5949134 PRJNA398089 \n",
+ "27 CSM79HP4 PRJNA398089 SRR5949135 PRJNA398089 \n",
+ "28 HSM7CYY7 PRJNA398089 SRR5949136 PRJNA398089 \n",
+ "29 CSM79HLC PRJNA398089 SRR5949137 PRJNA398089 \n",
+ "30 MSM79H5K PRJNA398089 SRR5949138 PRJNA398089 \n",
+ "⋮ ⋮ ⋮ ⋮ ⋮ \n",
+ "732 PSM6XBSM PRJNA398089 SRR5963918 PRJNA398089 \n",
+ "733 MSM79HDQ_TR PRJNA398089 SRR5963920 PRJNA398089 \n",
+ "734 MSM79HDG_TR PRJNA398089 SRR5963921 PRJNA398089 \n",
+ "735 MSM9VZEK_TR PRJNA398089 SRR5963922 PRJNA398089 \n",
+ "736 MSM9VZMA_TR PRJNA398089 SRR5963923 PRJNA398089 \n",
+ "737 PSMA265J_TR PRJNA398089 SRR5963924 PRJNA398089 \n",
+ "738 MSMA26AZ_TR PRJNA398089 SRR5963925 PRJNA398089 \n",
+ "739 HSMA33LH_TR PRJNA398089 SRR5963926 PRJNA398089 \n",
+ "740 MSM9VZNH_TR PRJNA398089 SRR5963927 PRJNA398089 \n",
+ "741 HSMA33RX_TR PRJNA398089 SRR5963928 PRJNA398089 \n",
+ "742 PSM6XBQY_TR PRJNA398089 SRR5963929 PRJNA398089 \n",
+ "743 CSM79HLA_TR PRJNA398089 SRR5963930 PRJNA398089 \n",
+ "744 HSM6XRVC_TR PRJNA398089 SRR5963931 PRJNA398089 \n",
+ "745 HSM5MD6A_TR PRJNA398089 SRR5963932 PRJNA398089 \n",
+ "746 PSM6XBRK_TR PRJNA398089 SRR5963933 PRJNA398089 \n",
+ "747 CSM67UEW_TR PRJNA398089 SRR5963934 PRJNA398089 \n",
+ "748 HSM67VEM_TR PRJNA398089 SRR5963935 PRJNA398089 \n",
+ "749 CSM67UDR_TR PRJNA398089 SRR5963936 PRJNA398089 \n",
+ "750 PSM6XBSU_TR PRJNA398089 SRR5963937 PRJNA398089 \n",
+ "751 MSM79HF9_TR PRJNA398089 SRR5963938 PRJNA398089 \n",
+ "752 PSM6XBRK PRJNA398089 SRR5963939 PRJNA398089 \n",
+ "753 PSM6XBQY PRJNA398089 SRR5963940 PRJNA398089 \n",
+ "754 MSMA26EP PRJNA398089 SRR5963941 PRJNA398089 \n",
+ "755 MSM9VZOW PRJNA398089 SRR5963942 PRJNA398089 \n",
+ "756 PSM6XBQU PRJNA398089 SRR5963943 PRJNA398089 \n",
+ "757 MSMAPC5Z PRJNA398089 SRR5963944 PRJNA398089 \n",
+ "758 MSM6J2M3 PRJNA398089 SRR5963945 PRJNA398089 \n",
+ "759 MSM6J2K6 PRJNA398089 SRR5963946 PRJNA398089 \n",
+ "760 MSM6J2Q1 PRJNA398089 SRR5963947 PRJNA398089 \n",
+ "761 MSM6J2PS PRJNA398089 SRR5963948 PRJNA398089 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR5936217 human_gut \n",
+ "2 SRR5936216 human_gut \n",
+ "3 SRR5936212 human_gut \n",
+ "4 SRR5936211 human_gut \n",
+ "5 SRR5946811 human_gut \n",
+ "6 SRR5936215 human_gut \n",
+ "7 SRR5947089 human_gut \n",
+ "8 SRR5936210 human_gut \n",
+ "9 SRR5935975 human_gut \n",
+ "10 SRR5935976 human_gut \n",
+ "11 SRR5935953 human_gut \n",
+ "12 SRR5935950 human_gut \n",
+ "13 SRR5935951 human_gut \n",
+ "14 SRR5935952 human_gut \n",
+ "15 SRR5935954 human_gut \n",
+ "16 SRR5935955 human_gut \n",
+ "17 SRR5935956 human_gut \n",
+ "18 SRR5935957 human_gut \n",
+ "19 SRR5936016 human_gut \n",
+ "20 SRR5947102 human_gut \n",
+ "21 SRR5936018 human_gut \n",
+ "22 SRR5936066 human_gut \n",
+ "23 SRR5936064 human_gut \n",
+ "24 SRR5936119 human_gut \n",
+ "25 SRR5936118 human_gut \n",
+ "26 SRR5936121 human_gut \n",
+ "27 SRR5946645 human_gut \n",
+ "28 SRR5936117 human_gut \n",
+ "29 SRR5936176 human_gut \n",
+ "30 SRR5936132 human_gut \n",
+ "⋮ ⋮ ⋮ \n",
+ "732 SRR5950713 human_gut \n",
+ "733 SRR5962901 human_gut \n",
+ "734 SRR5962900 human_gut \n",
+ "735 SRR5962903 human_gut \n",
+ "736 SRR5962902 human_gut \n",
+ "737 SRR5962904 human_gut \n",
+ "738 SRR5962908 human_gut \n",
+ "739 SRR5962907 human_gut \n",
+ "740 SRR5962906 human_gut \n",
+ "741 SRR5962885 human_gut \n",
+ "742 SRR5962895 human_gut \n",
+ "743 SRR5962894 human_gut \n",
+ "744 SRR5962893 human_gut \n",
+ "745 SRR5962891 human_gut \n",
+ "746 SRR5962890 human_gut \n",
+ "747 SRR5962889 human_gut \n",
+ "748 SRR5962892 human_gut \n",
+ "749 SRR5962888 human_gut \n",
+ "750 SRR5962897 human_gut \n",
+ "751 SRR5962898 human_gut \n",
+ "752 SRR5935960 human_gut \n",
+ "753 SRR5935809 human_gut \n",
+ "754 SRR5946720 human_gut \n",
+ "755 SRR5946733 human_gut \n",
+ "756 SRR5936007 human_gut \n",
+ "757 SRR5946832 human_gut \n",
+ "758 SRR5935963 human_gut \n",
+ "759 SRR5935938 human_gut \n",
+ "760 SRR5936013 human_gut \n",
+ "761 SRR5936231 human_gut "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna398089_mtx <- prjna398089 %>%\n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " filter(library_strategy == \"RNA-Seq\") %>%\n",
+ " mutate(sample_name = gsub(\"_MTX\", \"\", experiment_alias)) %>%\n",
+ " select(sample_name, \n",
+ " mtx_study_accession = study_accession, \n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna398089_mgx <- prjna398089 %>%\n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " mutate(sample_name = gsub(\"_MGX\", \"\", experiment_alias)) %>%\n",
+ " select(sample_name, \n",
+ " mgx_study_accession = study_accession, \n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna398089_mtx_vs_mgx <- inner_join(prjna398089_mtx, prjna398089_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"human_gut\")\n",
+ "\n",
+ "prjna398089_mtx_vs_mgx "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f12fe413",
+ "metadata": {},
+ "source": [
+ "## PRJNA632343"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "ce6d4776",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna632343 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA632343&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "acd04061",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tAmar_Carlsberg-38I-DV129-2-N2 | PRJNA632343 | SRR11781625 | PRJNA632343 | SRR11781636 | snail_gut |
\n",
+ "\tAmar_Carlsberg-38I-DV131-6-Intestine | PRJNA632343 | SRR11781640 | PRJNA632343 | SRR11781639 | snail_gut |
\n",
+ "\tAmar_Carlsberg-38I-DV129-15-Intestine | PRJNA632343 | SRR11781643 | PRJNA632343 | SRR11781642 | snail_gut |
\n",
+ "\tAmar_Carlsberg-38I-DV129-5-Intestine | PRJNA632343 | SRR11781646 | PRJNA632343 | SRR11781645 | snail_gut |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 4 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t Amar\\_Carlsberg-38I-DV129-2-N2 & PRJNA632343 & SRR11781625 & PRJNA632343 & SRR11781636 & snail\\_gut\\\\\n",
+ "\t Amar\\_Carlsberg-38I-DV131-6-Intestine & PRJNA632343 & SRR11781640 & PRJNA632343 & SRR11781639 & snail\\_gut\\\\\n",
+ "\t Amar\\_Carlsberg-38I-DV129-15-Intestine & PRJNA632343 & SRR11781643 & PRJNA632343 & SRR11781642 & snail\\_gut\\\\\n",
+ "\t Amar\\_Carlsberg-38I-DV129-5-Intestine & PRJNA632343 & SRR11781646 & PRJNA632343 & SRR11781645 & snail\\_gut\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| Amar_Carlsberg-38I-DV129-2-N2 | PRJNA632343 | SRR11781625 | PRJNA632343 | SRR11781636 | snail_gut |\n",
+ "| Amar_Carlsberg-38I-DV131-6-Intestine | PRJNA632343 | SRR11781640 | PRJNA632343 | SRR11781639 | snail_gut |\n",
+ "| Amar_Carlsberg-38I-DV129-15-Intestine | PRJNA632343 | SRR11781643 | PRJNA632343 | SRR11781642 | snail_gut |\n",
+ "| Amar_Carlsberg-38I-DV129-5-Intestine | PRJNA632343 | SRR11781646 | PRJNA632343 | SRR11781645 | snail_gut |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession\n",
+ "1 Amar_Carlsberg-38I-DV129-2-N2 PRJNA632343 SRR11781625 \n",
+ "2 Amar_Carlsberg-38I-DV131-6-Intestine PRJNA632343 SRR11781640 \n",
+ "3 Amar_Carlsberg-38I-DV129-15-Intestine PRJNA632343 SRR11781643 \n",
+ "4 Amar_Carlsberg-38I-DV129-5-Intestine PRJNA632343 SRR11781646 \n",
+ " mgx_study_accession mgx_run_accession sample_type\n",
+ "1 PRJNA632343 SRR11781636 snail_gut \n",
+ "2 PRJNA632343 SRR11781639 snail_gut \n",
+ "3 PRJNA632343 SRR11781642 snail_gut \n",
+ "4 PRJNA632343 SRR11781645 snail_gut "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna632343_mtx <- prjna632343 %>%\n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " filter(library_strategy == \"RNA-Seq\") %>%\n",
+ " mutate(sample_name = gsub(\"-Illumina-metaRNA\", \"\", experiment_alias),\n",
+ " sample_name = gsub(\"-Illumina-meta-RNA\", \"\", sample_name)) %>%\n",
+ " select(sample_name, \n",
+ " mtx_study_accession = study_accession, \n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna632343_mgx <- prjna632343 %>%\n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " filter(instrument_platform == \"ILLUMINA\") %>%\n",
+ " mutate(sample_name = gsub(\"-Illumina-metaDNA\", \"\", experiment_alias),\n",
+ " sample_name = gsub(\"-Illumina-meta-DNA\", \"\", sample_name)) %>%\n",
+ " select(sample_name, \n",
+ " mgx_study_accession = study_accession, \n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna632343_mtx_vs_mgx <- inner_join(prjna632343_mtx, prjna632343_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"snail_gut\")\n",
+ "\n",
+ "prjna632343_mtx_vs_mgx "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "941acc7a",
+ "metadata": {},
+ "source": [
+ "## PRJNA242360"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "5ce910c4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna242360 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA242360&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "d231a938",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tBATS_256_C3_1B | PRJNA242360 | SRR1230757 | PRJNA242360 | SRR1230754 | ocean |
\n",
+ "\tBATS_261_C2_B2 | PRJNA242360 | SRR1230758 | PRJNA242360 | SRR1230755 | ocean |
\n",
+ "\tBATS_261_C8_F2 | PRJNA242360 | SRR1230759 | PRJNA242360 | SRR1230756 | ocean |
\n",
+ "\tBATS_256_C10_5A | PRJNA242360 | SRR1238005 | PRJNA242360 | SRR1230729 | ocean |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 4 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t BATS\\_256\\_C3\\_1B & PRJNA242360 & SRR1230757 & PRJNA242360 & SRR1230754 & ocean\\\\\n",
+ "\t BATS\\_261\\_C2\\_B2 & PRJNA242360 & SRR1230758 & PRJNA242360 & SRR1230755 & ocean\\\\\n",
+ "\t BATS\\_261\\_C8\\_F2 & PRJNA242360 & SRR1230759 & PRJNA242360 & SRR1230756 & ocean\\\\\n",
+ "\t BATS\\_256\\_C10\\_5A & PRJNA242360 & SRR1238005 & PRJNA242360 & SRR1230729 & ocean\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 4 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| BATS_256_C3_1B | PRJNA242360 | SRR1230757 | PRJNA242360 | SRR1230754 | ocean |\n",
+ "| BATS_261_C2_B2 | PRJNA242360 | SRR1230758 | PRJNA242360 | SRR1230755 | ocean |\n",
+ "| BATS_261_C8_F2 | PRJNA242360 | SRR1230759 | PRJNA242360 | SRR1230756 | ocean |\n",
+ "| BATS_256_C10_5A | PRJNA242360 | SRR1238005 | PRJNA242360 | SRR1230729 | ocean |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 BATS_256_C3_1B PRJNA242360 SRR1230757 PRJNA242360 \n",
+ "2 BATS_261_C2_B2 PRJNA242360 SRR1230758 PRJNA242360 \n",
+ "3 BATS_261_C8_F2 PRJNA242360 SRR1230759 PRJNA242360 \n",
+ "4 BATS_256_C10_5A PRJNA242360 SRR1238005 PRJNA242360 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR1230754 ocean \n",
+ "2 SRR1230755 ocean \n",
+ "3 SRR1230756 ocean \n",
+ "4 SRR1230729 ocean "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna242360_mtx <- prjna242360 %>%\n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " filter(instrument_platform == \"ILLUMINA\") %>%\n",
+ " mutate(sample_name = gsub(\"Illumina HiSeq 2500 sequencing; \", \"\", experiment_title),\n",
+ " sample_name = gsub(\"_M[GT]\", \"\", sample_name),\n",
+ " sample_name = gsub(\"_R1R2\", \"\", sample_name),\n",
+ " sample_name = gsub(\"C2_2\", \"C2_B2\", sample_name),\n",
+ " sample_name = gsub(\"C8_2\", \"C8_F2\", sample_name)) %>%\n",
+ " select(sample_name, \n",
+ " mtx_study_accession = study_accession, \n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna242360_mgx <- prjna242360 %>%\n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " filter(instrument_platform == \"ILLUMINA\") %>%\n",
+ " mutate(sample_name = gsub(\"Illumina HiSeq 2500 sequencing; \", \"\", experiment_title),\n",
+ " sample_name = gsub(\"_M[GT]\", \"\", sample_name),\n",
+ " sample_name = gsub(\"_R1R2\", \"\", sample_name),\n",
+ " sample_name = gsub(\"C2_2\", \"C2_B2\", sample_name),\n",
+ " sample_name = gsub(\"C8_2\", \"C8_F2\", sample_name)) %>%\n",
+ " select(sample_name, \n",
+ " mgx_study_accession = study_accession, \n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna242360_mtx_vs_mgx <- inner_join(prjna242360_mtx, prjna242360_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"ocean\")\n",
+ "\n",
+ "prjna242360_mtx_vs_mgx "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0f3958d4",
+ "metadata": {},
+ "source": [
+ "## PRJEB12234"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "7b4cd065",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjeb12234 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJEB12234&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "a965057c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 13 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tES_08072017_CAT2012July | PRJEB12234 | ERR2088997 | PRJEB12234 | ERR2094669 | ocean |
\n",
+ "\tES_08072017_CAT2012October | PRJEB12234 | ERR2088998 | PRJEB12234 | ERR2094670 | ocean |
\n",
+ "\tES_08072017_CAT2013January | PRJEB12234 | ERR2088999 | PRJEB12234 | ERR2094671 | ocean |
\n",
+ "\tES_08072017_CAT2013April | PRJEB12234 | ERR2089000 | PRJEB12234 | ERR2094672 | ocean |
\n",
+ "\tES_08072017_POLA2012July | PRJEB12234 | ERR2089001 | PRJEB12234 | ERR2094673 | ocean |
\n",
+ "\tES_08072017_POLA2012October | PRJEB12234 | ERR2089002 | PRJEB12234 | ERR2094674 | ocean |
\n",
+ "\tES_08072017_POLA2013January | PRJEB12234 | ERR2089003 | PRJEB12234 | ERR2094675 | ocean |
\n",
+ "\tES_08072017_POLA2013April | PRJEB12234 | ERR2089004 | PRJEB12234 | ERR2094676 | ocean |
\n",
+ "\tES_08072017_SPOT2012July | PRJEB12234 | ERR2089005 | PRJEB12234 | ERR2094677 | ocean |
\n",
+ "\tES_08072017_SPOT2012October | PRJEB12234 | ERR2089006 | PRJEB12234 | ERR2094678 | ocean |
\n",
+ "\tES_08072017_SPOT2013January | PRJEB12234 | ERR2089007 | PRJEB12234 | ERR2094679 | ocean |
\n",
+ "\tES_08072017_SPOT2013April | PRJEB12234 | ERR2089008 | PRJEB12234 | ERR2094680 | ocean |
\n",
+ "\tES_08072017_neg_reads | PRJEB12234 | ERR2089009 | PRJEB12234 | ERR2097133 | ocean |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 13 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t ES\\_08072017\\_CAT2012July & PRJEB12234 & ERR2088997 & PRJEB12234 & ERR2094669 & ocean\\\\\n",
+ "\t ES\\_08072017\\_CAT2012October & PRJEB12234 & ERR2088998 & PRJEB12234 & ERR2094670 & ocean\\\\\n",
+ "\t ES\\_08072017\\_CAT2013January & PRJEB12234 & ERR2088999 & PRJEB12234 & ERR2094671 & ocean\\\\\n",
+ "\t ES\\_08072017\\_CAT2013April & PRJEB12234 & ERR2089000 & PRJEB12234 & ERR2094672 & ocean\\\\\n",
+ "\t ES\\_08072017\\_POLA2012July & PRJEB12234 & ERR2089001 & PRJEB12234 & ERR2094673 & ocean\\\\\n",
+ "\t ES\\_08072017\\_POLA2012October & PRJEB12234 & ERR2089002 & PRJEB12234 & ERR2094674 & ocean\\\\\n",
+ "\t ES\\_08072017\\_POLA2013January & PRJEB12234 & ERR2089003 & PRJEB12234 & ERR2094675 & ocean\\\\\n",
+ "\t ES\\_08072017\\_POLA2013April & PRJEB12234 & ERR2089004 & PRJEB12234 & ERR2094676 & ocean\\\\\n",
+ "\t ES\\_08072017\\_SPOT2012July & PRJEB12234 & ERR2089005 & PRJEB12234 & ERR2094677 & ocean\\\\\n",
+ "\t ES\\_08072017\\_SPOT2012October & PRJEB12234 & ERR2089006 & PRJEB12234 & ERR2094678 & ocean\\\\\n",
+ "\t ES\\_08072017\\_SPOT2013January & PRJEB12234 & ERR2089007 & PRJEB12234 & ERR2094679 & ocean\\\\\n",
+ "\t ES\\_08072017\\_SPOT2013April & PRJEB12234 & ERR2089008 & PRJEB12234 & ERR2094680 & ocean\\\\\n",
+ "\t ES\\_08072017\\_neg\\_reads & PRJEB12234 & ERR2089009 & PRJEB12234 & ERR2097133 & ocean\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 13 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| ES_08072017_CAT2012July | PRJEB12234 | ERR2088997 | PRJEB12234 | ERR2094669 | ocean |\n",
+ "| ES_08072017_CAT2012October | PRJEB12234 | ERR2088998 | PRJEB12234 | ERR2094670 | ocean |\n",
+ "| ES_08072017_CAT2013January | PRJEB12234 | ERR2088999 | PRJEB12234 | ERR2094671 | ocean |\n",
+ "| ES_08072017_CAT2013April | PRJEB12234 | ERR2089000 | PRJEB12234 | ERR2094672 | ocean |\n",
+ "| ES_08072017_POLA2012July | PRJEB12234 | ERR2089001 | PRJEB12234 | ERR2094673 | ocean |\n",
+ "| ES_08072017_POLA2012October | PRJEB12234 | ERR2089002 | PRJEB12234 | ERR2094674 | ocean |\n",
+ "| ES_08072017_POLA2013January | PRJEB12234 | ERR2089003 | PRJEB12234 | ERR2094675 | ocean |\n",
+ "| ES_08072017_POLA2013April | PRJEB12234 | ERR2089004 | PRJEB12234 | ERR2094676 | ocean |\n",
+ "| ES_08072017_SPOT2012July | PRJEB12234 | ERR2089005 | PRJEB12234 | ERR2094677 | ocean |\n",
+ "| ES_08072017_SPOT2012October | PRJEB12234 | ERR2089006 | PRJEB12234 | ERR2094678 | ocean |\n",
+ "| ES_08072017_SPOT2013January | PRJEB12234 | ERR2089007 | PRJEB12234 | ERR2094679 | ocean |\n",
+ "| ES_08072017_SPOT2013April | PRJEB12234 | ERR2089008 | PRJEB12234 | ERR2094680 | ocean |\n",
+ "| ES_08072017_neg_reads | PRJEB12234 | ERR2089009 | PRJEB12234 | ERR2097133 | ocean |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession\n",
+ "1 ES_08072017_CAT2012July PRJEB12234 ERR2088997 \n",
+ "2 ES_08072017_CAT2012October PRJEB12234 ERR2088998 \n",
+ "3 ES_08072017_CAT2013January PRJEB12234 ERR2088999 \n",
+ "4 ES_08072017_CAT2013April PRJEB12234 ERR2089000 \n",
+ "5 ES_08072017_POLA2012July PRJEB12234 ERR2089001 \n",
+ "6 ES_08072017_POLA2012October PRJEB12234 ERR2089002 \n",
+ "7 ES_08072017_POLA2013January PRJEB12234 ERR2089003 \n",
+ "8 ES_08072017_POLA2013April PRJEB12234 ERR2089004 \n",
+ "9 ES_08072017_SPOT2012July PRJEB12234 ERR2089005 \n",
+ "10 ES_08072017_SPOT2012October PRJEB12234 ERR2089006 \n",
+ "11 ES_08072017_SPOT2013January PRJEB12234 ERR2089007 \n",
+ "12 ES_08072017_SPOT2013April PRJEB12234 ERR2089008 \n",
+ "13 ES_08072017_neg_reads PRJEB12234 ERR2089009 \n",
+ " mgx_study_accession mgx_run_accession sample_type\n",
+ "1 PRJEB12234 ERR2094669 ocean \n",
+ "2 PRJEB12234 ERR2094670 ocean \n",
+ "3 PRJEB12234 ERR2094671 ocean \n",
+ "4 PRJEB12234 ERR2094672 ocean \n",
+ "5 PRJEB12234 ERR2094673 ocean \n",
+ "6 PRJEB12234 ERR2094674 ocean \n",
+ "7 PRJEB12234 ERR2094675 ocean \n",
+ "8 PRJEB12234 ERR2094676 ocean \n",
+ "9 PRJEB12234 ERR2094677 ocean \n",
+ "10 PRJEB12234 ERR2094678 ocean \n",
+ "11 PRJEB12234 ERR2094679 ocean \n",
+ "12 PRJEB12234 ERR2094680 ocean \n",
+ "13 PRJEB12234 ERR2097133 ocean "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjeb12234_mtx <- prjeb12234 %>%\n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " mutate(sample_name = gsub(\"_MT\", \"\", sample_alias)) %>%\n",
+ " select(sample_name, \n",
+ " mtx_study_accession = study_accession, \n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb12234_mgx <- prjeb12234 %>%\n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " filter(library_strategy == \"WGS\") %>%\n",
+ " mutate(sample_name = gsub(\"_MG\", \"\", sample_alias)) %>%\n",
+ " select(sample_name, \n",
+ " mgx_study_accession = study_accession, \n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjeb12234_mtx_vs_mgx <- inner_join(prjeb12234_mtx, prjeb12234_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"ocean\")\n",
+ "\n",
+ "prjeb12234_mtx_vs_mgx "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "419d1e0e",
+ "metadata": {},
+ "source": [
+ "## PRJNA340003"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "308ec27f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prjna340003 <- read_tsv(\"https://www.ebi.ac.uk/ena/portal/api/filereport?accession=PRJNA340003&result=read_run&fields=study_accession,secondary_study_accession,sample_accession,secondary_sample_accession,experiment_accession,run_accession,submission_accession,tax_id,scientific_name,instrument_platform,instrument_model,library_name,nominal_length,library_layout,library_strategy,library_source,library_selection,read_count,base_count,center_name,first_public,last_updated,experiment_title,study_title,study_alias,experiment_alias,run_alias,fastq_bytes,fastq_md5,fastq_ftp,fastq_aspera,fastq_galaxy,submitted_bytes,submitted_md5,submitted_ftp,submitted_aspera,submitted_galaxy,submitted_format,sra_bytes,sra_md5,sra_ftp,sra_aspera,sra_galaxy,cram_index_ftp,cram_index_aspera,cram_index_galaxy,sample_alias,broker_name,sample_title,nominal_sdev,first_created&format=tsv&download=true&limit=0\",\n",
+ " show_col_types = F)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "b9428e0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tE2A | PRJNA340003 | SRR4342131 | PRJNA340003 | SRR4342135 | ocean |
\n",
+ "\tE4 | PRJNA340003 | SRR4342132 | PRJNA340003 | SRR4342136 | ocean |
\n",
+ "\tD1 | PRJNA340003 | SRR4342137 | PRJNA340003 | SRR4342129 | ocean |
\n",
+ "\tD2 | PRJNA340003 | SRR4342138 | PRJNA340003 | SRR4342130 | ocean |
\n",
+ "\tD3 | PRJNA340003 | SRR4342139 | PRJNA340003 | SRR4342133 | ocean |
\n",
+ "\tE2 | PRJNA340003 | SRR4342140 | PRJNA340003 | SRR4342134 | ocean |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 6 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t E2A & PRJNA340003 & SRR4342131 & PRJNA340003 & SRR4342135 & ocean\\\\\n",
+ "\t E4 & PRJNA340003 & SRR4342132 & PRJNA340003 & SRR4342136 & ocean\\\\\n",
+ "\t D1 & PRJNA340003 & SRR4342137 & PRJNA340003 & SRR4342129 & ocean\\\\\n",
+ "\t D2 & PRJNA340003 & SRR4342138 & PRJNA340003 & SRR4342130 & ocean\\\\\n",
+ "\t D3 & PRJNA340003 & SRR4342139 & PRJNA340003 & SRR4342133 & ocean\\\\\n",
+ "\t E2 & PRJNA340003 & SRR4342140 & PRJNA340003 & SRR4342134 & ocean\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| E2A | PRJNA340003 | SRR4342131 | PRJNA340003 | SRR4342135 | ocean |\n",
+ "| E4 | PRJNA340003 | SRR4342132 | PRJNA340003 | SRR4342136 | ocean |\n",
+ "| D1 | PRJNA340003 | SRR4342137 | PRJNA340003 | SRR4342129 | ocean |\n",
+ "| D2 | PRJNA340003 | SRR4342138 | PRJNA340003 | SRR4342130 | ocean |\n",
+ "| D3 | PRJNA340003 | SRR4342139 | PRJNA340003 | SRR4342133 | ocean |\n",
+ "| E2 | PRJNA340003 | SRR4342140 | PRJNA340003 | SRR4342134 | ocean |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 E2A PRJNA340003 SRR4342131 PRJNA340003 \n",
+ "2 E4 PRJNA340003 SRR4342132 PRJNA340003 \n",
+ "3 D1 PRJNA340003 SRR4342137 PRJNA340003 \n",
+ "4 D2 PRJNA340003 SRR4342138 PRJNA340003 \n",
+ "5 D3 PRJNA340003 SRR4342139 PRJNA340003 \n",
+ "6 E2 PRJNA340003 SRR4342140 PRJNA340003 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR4342135 ocean \n",
+ "2 SRR4342136 ocean \n",
+ "3 SRR4342129 ocean \n",
+ "4 SRR4342130 ocean \n",
+ "5 SRR4342133 ocean \n",
+ "6 SRR4342134 ocean "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prjna340003_mtx <- prjna340003 %>%\n",
+ " filter(library_source == \"METATRANSCRIPTOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\":RNA\", \"\", sample_alias)) %>%\n",
+ " select(sample_name, \n",
+ " mtx_study_accession = study_accession, \n",
+ " mtx_run_accession = run_accession)\n",
+ "\n",
+ "prjna340003_mgx <- prjna340003 %>%\n",
+ " filter(library_source == \"METAGENOMIC\") %>%\n",
+ " mutate(sample_name = gsub(\":DNA\", \"\", sample_alias)) %>%\n",
+ " select(sample_name, \n",
+ " mgx_study_accession = study_accession, \n",
+ " mgx_run_accession = run_accession)\n",
+ "\n",
+ "prjna340003_mtx_vs_mgx <- inner_join(prjna340003_mtx, prjna340003_mgx, by = \"sample_name\") %>%\n",
+ " mutate(sample_type = \"ocean\")\n",
+ "\n",
+ "prjna340003_mtx_vs_mgx "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc5a0dc6",
+ "metadata": {},
+ "source": [
+ "# Combine everything together"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "0aace26d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tC_2_1 | PRJNA406858 | SRR6032600 | PRJNA406858 | SRR6032602 | activated_sludge |
\n",
+ "\tB_2_1 | PRJNA406858 | SRR6032604 | PRJNA406858 | SRR6032601 | activated_sludge |
\n",
+ "\tE_2_1 | PRJNA406858 | SRR6032605 | PRJNA406858 | SRR6032603 | activated_sludge |
\n",
+ "\trumen_microbiome_of_beef_cattle_101 | PRJNA448333 | SRR8416057 | PRJNA448333 | SRR8397906 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_103 | PRJNA448333 | SRR8416058 | PRJNA448333 | SRR8404214 | cattle_rumen |
\n",
+ "\trumen_microbiome_of_beef_cattle_104 | PRJNA448333 | SRR8416055 | PRJNA448333 | SRR8397905 | cattle_rumen |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 6 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t C\\_2\\_1 & PRJNA406858 & SRR6032600 & PRJNA406858 & SRR6032602 & activated\\_sludge\\\\\n",
+ "\t B\\_2\\_1 & PRJNA406858 & SRR6032604 & PRJNA406858 & SRR6032601 & activated\\_sludge\\\\\n",
+ "\t E\\_2\\_1 & PRJNA406858 & SRR6032605 & PRJNA406858 & SRR6032603 & activated\\_sludge\\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_101 & PRJNA448333 & SRR8416057 & PRJNA448333 & SRR8397906 & cattle\\_rumen \\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_103 & PRJNA448333 & SRR8416058 & PRJNA448333 & SRR8404214 & cattle\\_rumen \\\\\n",
+ "\t rumen\\_microbiome\\_of\\_beef\\_cattle\\_104 & PRJNA448333 & SRR8416055 & PRJNA448333 & SRR8397905 & cattle\\_rumen \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| C_2_1 | PRJNA406858 | SRR6032600 | PRJNA406858 | SRR6032602 | activated_sludge |\n",
+ "| B_2_1 | PRJNA406858 | SRR6032604 | PRJNA406858 | SRR6032601 | activated_sludge |\n",
+ "| E_2_1 | PRJNA406858 | SRR6032605 | PRJNA406858 | SRR6032603 | activated_sludge |\n",
+ "| rumen_microbiome_of_beef_cattle_101 | PRJNA448333 | SRR8416057 | PRJNA448333 | SRR8397906 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_103 | PRJNA448333 | SRR8416058 | PRJNA448333 | SRR8404214 | cattle_rumen |\n",
+ "| rumen_microbiome_of_beef_cattle_104 | PRJNA448333 | SRR8416055 | PRJNA448333 | SRR8397905 | cattle_rumen |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession\n",
+ "1 C_2_1 PRJNA406858 SRR6032600 \n",
+ "2 B_2_1 PRJNA406858 SRR6032604 \n",
+ "3 E_2_1 PRJNA406858 SRR6032605 \n",
+ "4 rumen_microbiome_of_beef_cattle_101 PRJNA448333 SRR8416057 \n",
+ "5 rumen_microbiome_of_beef_cattle_103 PRJNA448333 SRR8416058 \n",
+ "6 rumen_microbiome_of_beef_cattle_104 PRJNA448333 SRR8416055 \n",
+ " mgx_study_accession mgx_run_accession sample_type \n",
+ "1 PRJNA406858 SRR6032602 activated_sludge\n",
+ "2 PRJNA406858 SRR6032601 activated_sludge\n",
+ "3 PRJNA406858 SRR6032603 activated_sludge\n",
+ "4 PRJNA448333 SRR8397906 cattle_rumen \n",
+ "5 PRJNA448333 SRR8404214 cattle_rumen \n",
+ "6 PRJNA448333 SRR8397905 cattle_rumen "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\tE2A | PRJNA340003 | SRR4342131 | PRJNA340003 | SRR4342135 | ocean |
\n",
+ "\tE4 | PRJNA340003 | SRR4342132 | PRJNA340003 | SRR4342136 | ocean |
\n",
+ "\tD1 | PRJNA340003 | SRR4342137 | PRJNA340003 | SRR4342129 | ocean |
\n",
+ "\tD2 | PRJNA340003 | SRR4342138 | PRJNA340003 | SRR4342130 | ocean |
\n",
+ "\tD3 | PRJNA340003 | SRR4342139 | PRJNA340003 | SRR4342133 | ocean |
\n",
+ "\tE2 | PRJNA340003 | SRR4342140 | PRJNA340003 | SRR4342134 | ocean |
\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 6 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\t E2A & PRJNA340003 & SRR4342131 & PRJNA340003 & SRR4342135 & ocean\\\\\n",
+ "\t E4 & PRJNA340003 & SRR4342132 & PRJNA340003 & SRR4342136 & ocean\\\\\n",
+ "\t D1 & PRJNA340003 & SRR4342137 & PRJNA340003 & SRR4342129 & ocean\\\\\n",
+ "\t D2 & PRJNA340003 & SRR4342138 & PRJNA340003 & SRR4342130 & ocean\\\\\n",
+ "\t D3 & PRJNA340003 & SRR4342139 & PRJNA340003 & SRR4342133 & ocean\\\\\n",
+ "\t E2 & PRJNA340003 & SRR4342140 & PRJNA340003 & SRR4342134 & ocean\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 6 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "| E2A | PRJNA340003 | SRR4342131 | PRJNA340003 | SRR4342135 | ocean |\n",
+ "| E4 | PRJNA340003 | SRR4342132 | PRJNA340003 | SRR4342136 | ocean |\n",
+ "| D1 | PRJNA340003 | SRR4342137 | PRJNA340003 | SRR4342129 | ocean |\n",
+ "| D2 | PRJNA340003 | SRR4342138 | PRJNA340003 | SRR4342130 | ocean |\n",
+ "| D3 | PRJNA340003 | SRR4342139 | PRJNA340003 | SRR4342133 | ocean |\n",
+ "| E2 | PRJNA340003 | SRR4342140 | PRJNA340003 | SRR4342134 | ocean |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ "1 E2A PRJNA340003 SRR4342131 PRJNA340003 \n",
+ "2 E4 PRJNA340003 SRR4342132 PRJNA340003 \n",
+ "3 D1 PRJNA340003 SRR4342137 PRJNA340003 \n",
+ "4 D2 PRJNA340003 SRR4342138 PRJNA340003 \n",
+ "5 D3 PRJNA340003 SRR4342139 PRJNA340003 \n",
+ "6 E2 PRJNA340003 SRR4342140 PRJNA340003 \n",
+ " mgx_run_accession sample_type\n",
+ "1 SRR4342135 ocean \n",
+ "2 SRR4342136 ocean \n",
+ "3 SRR4342129 ocean \n",
+ "4 SRR4342130 ocean \n",
+ "5 SRR4342133 ocean \n",
+ "6 SRR4342134 ocean "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "1280"
+ ],
+ "text/latex": [
+ "1280"
+ ],
+ "text/markdown": [
+ "1280"
+ ],
+ "text/plain": [
+ "[1] 1280"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "all_paired_mtx_mgx <- bind_rows(prjna406858_mtx_vs_mgx,\n",
+ " prjna448333_mtx_vs_mgx,\n",
+ " prjna344005_mtx_vs_mgx,\n",
+ " prjna453733_mtx_vs_mgx,\n",
+ " prjna237345_vs_prjna237344,\n",
+ " moran010B_mtx_vs_mgx,\n",
+ " prjna603240_mtx_vs_mgx,\n",
+ " prjna202380_mtx_vs_mgx,\n",
+ " prjna541981_mtx_vs_mgx,\n",
+ " prjna797778_mtx_vs_mgx,\n",
+ " prjna339914_mtx_vs_mgx,\n",
+ " prjeb33889_mtx_vs_mgx,\n",
+ " prjna698464_mtx_vs_mgx,\n",
+ " prjna396840_mtx_vs_mgx,\n",
+ " prjna492158_mtx_vs_mgx,\n",
+ " prjna278075_mtx_vs_mgx,\n",
+ " prjeb38017_mtx_vs_mgx,\n",
+ " prjna616041_mtx_vs_mgx,\n",
+ " prjna395125_vs_prjna393770,\n",
+ " prjeb12284_vs_prjeb12083,\n",
+ " prjeb32788_vs_prjeb32781,\n",
+ " prjna398089_mtx_vs_mgx,\n",
+ " prjna632343_mtx_vs_mgx,\n",
+ " prjna242360_mtx_vs_mgx,\n",
+ " prjeb12234_mtx_vs_mgx,\n",
+ " prjna340003_mtx_vs_mgx)\n",
+ "head(all_paired_mtx_mgx)\n",
+ "tail(all_paired_mtx_mgx)\n",
+ "nrow(all_paired_mtx_mgx)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "f6f62f8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\n",
+ " activated_sludge bioreactor cattle_rumen \n",
+ " 3 11 48 \n",
+ "cocoa_box_fermentation deadwood groundwater \n",
+ " 3 10 2 \n",
+ " human_gut human_oral human_skin \n",
+ " 791 30 17 \n",
+ " human_vagina lake mouse_cecum \n",
+ " 180 1 8 \n",
+ " mussel_gill ocean paddy_soil \n",
+ " 17 98 6 \n",
+ " river sheep_rumen snail_gut \n",
+ " 23 20 4 \n",
+ " wastewater \n",
+ " 8 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# look at the breakdown of sample types\n",
+ "table(all_paired_mtx_mgx$sample_type)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "8d49c7ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "1280"
+ ],
+ "text/latex": [
+ "1280"
+ ],
+ "text/markdown": [
+ "1280"
+ ],
+ "text/plain": [
+ "[1] 1280"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A tibble: 0 × 6\n",
+ "\n",
+ "\tsample_name | mtx_study_accession | mtx_run_accession | mgx_study_accession | mgx_run_accession | sample_type |
\n",
+ "\t<chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n"
+ ],
+ "text/latex": [
+ "A tibble: 0 × 6\n",
+ "\\begin{tabular}{llllll}\n",
+ " sample\\_name & mtx\\_study\\_accession & mtx\\_run\\_accession & mgx\\_study\\_accession & mgx\\_run\\_accession & sample\\_type\\\\\n",
+ " & & & & & \\\\\n",
+ "\\hline\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A tibble: 0 × 6\n",
+ "\n",
+ "| sample_name <chr> | mtx_study_accession <chr> | mtx_run_accession <chr> | mgx_study_accession <chr> | mgx_run_accession <chr> | sample_type <chr> |\n",
+ "|---|---|---|---|---|---|\n",
+ "\n"
+ ],
+ "text/plain": [
+ " sample_name mtx_study_accession mtx_run_accession mgx_study_accession\n",
+ " mgx_run_accession sample_type"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# check and make sure there are no duplicated sample names, \n",
+ "# either from faulty joins for from x-study shared sample identifiers\n",
+ "length(unique(all_paired_mtx_mgx$sample_name))\n",
+ "\n",
+ "tmp <- all_paired_mtx_mgx %>% \n",
+ " group_by(sample_name) %>%\n",
+ " tally() %>%\n",
+ " filter(n > 1)\n",
+ "\n",
+ "all_paired_mtx_mgx %>%\n",
+ " filter(sample_name %in% tmp$sample_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cffc574e",
+ "metadata": {},
+ "source": [
+ "## Write out tsv with results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "63466e52",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "write_tsv(all_paired_mtx_mgx, \"inputs/metadata-paired-mgx-mtx.tsv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56293500",
+ "metadata": {},
+ "source": [
+ "## Session Info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "c886393d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "R version 4.1.3 (2022-03-10)\n",
+ "Platform: x86_64-apple-darwin13.4.0 (64-bit)\n",
+ "Running under: macOS Big Sur/Monterey 10.16\n",
+ "\n",
+ "Matrix products: default\n",
+ "BLAS/LAPACK: /Users/taylorreiter/miniconda3/envs/tidyjupyter/lib/libopenblasp-r0.3.21.dylib\n",
+ "\n",
+ "locale:\n",
+ "[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8\n",
+ "\n",
+ "attached base packages:\n",
+ "[1] stats graphics grDevices utils datasets methods base \n",
+ "\n",
+ "other attached packages:\n",
+ "[1] janitor_2.1.0 readr_2.1.2 dplyr_1.0.9 \n",
+ "\n",
+ "loaded via a namespace (and not attached):\n",
+ " [1] pillar_1.8.1 compiler_4.1.3 base64enc_0.1-3 tools_4.1.3 \n",
+ " [5] bit_4.0.4 digest_0.6.29 uuid_1.1-0 jsonlite_1.8.0 \n",
+ " [9] lubridate_1.8.0 evaluate_0.16 lifecycle_1.0.1 tibble_3.1.8 \n",
+ "[13] pkgconfig_2.0.3 rlang_1.0.4 IRdisplay_1.1 cli_3.3.0 \n",
+ "[17] DBI_1.1.3 curl_4.3.2 parallel_4.1.3 IRkernel_1.3 \n",
+ "[21] fastmap_1.1.0 stringr_1.4.0 repr_1.1.4 generics_0.1.3 \n",
+ "[25] vctrs_0.4.1 hms_1.1.2 bit64_4.0.5 tidyselect_1.1.2\n",
+ "[29] glue_1.6.2 snakecase_0.11.0 R6_2.5.1 fansi_1.0.3 \n",
+ "[33] vroom_1.5.7 pbdZMQ_0.3-7 purrr_0.3.4 tzdb_0.3.0 \n",
+ "[37] magrittr_2.0.3 htmltools_0.5.3 ellipsis_0.3.2 assertthat_0.2.1\n",
+ "[41] utf8_1.2.2 stringi_1.7.8 crayon_1.5.1 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sessionInfo()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "R",
+ "language": "R",
+ "name": "ir"
+ },
+ "language_info": {
+ "codemirror_mode": "r",
+ "file_extension": ".r",
+ "mimetype": "text/x-r-source",
+ "name": "R",
+ "pygments_lexer": "r",
+ "version": "4.1.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}