diff --git a/.gitignore b/.gitignore
index de56800..7c956a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -900,3 +900,4 @@ FodyWeavers.xsd
scratch/
src/eliater/case_studies.tsv
+.idea/*
\ No newline at end of file
diff --git a/notebooks/Case_study3_The_EColi.ipynb b/notebooks/Case_study3_The_EColi.ipynb
index 96a165c..3e19149 100644
--- a/notebooks/Case_study3_The_EColi.ipynb
+++ b/notebooks/Case_study3_The_EColi.ipynb
@@ -4,10 +4,7 @@
"cell_type": "markdown",
"id": "a618fea7cca96207",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"# Case study 3: The Escherichia coli K-12 transcriptional motif"
@@ -17,10 +14,7 @@
"cell_type": "markdown",
"id": "a0cedf172ad6ad7b",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"This is case study 3 in Figure 7 in this paper: *Eliater: an open source software for causal query estimation from observational measurements of biomolecular networks. The figure below shows the transcriptional regulatory network motif of E. coli from the EcoCyc database (Keseler et al., 2021). It comprises 44 nodes, of which 7 are latent and 45 are\n",
@@ -31,10 +25,7 @@
"cell_type": "markdown",
"id": "f505e30434334788",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"![ecoli](../img/ecoli.png)"
@@ -42,72 +33,22 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 21,
"id": "2942bcdf-98ce-450c-a888-fed60f417ff1",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-26T02:29:20.417608700Z",
- "start_time": "2024-01-26T02:29:20.397731200Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:50.975800700Z",
+ "start_time": "2024-05-02T23:52:50.808118Z"
}
},
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " key | \n",
- " value | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " eliater | \n",
- " 0.0.3-dev-28d9867e | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " y0 | \n",
- " 0.2.10-dev-8f27d998 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Run at | \n",
- " 2024-04-25 09:07:53 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " key value\n",
- "0 eliater 0.0.3-dev-28d9867e\n",
- "1 y0 0.2.10-dev-8f27d998\n",
- "2 Run at 2024-04-25 09:07:53"
- ]
+ "text/plain": " key value\n0 eliater 0.0.3-dev-80ab64d2\n1 y0 0.2.12-dev-80ab64d2\n2 Run at 2024-05-02 19:52:50",
+ "text/html": "\n\n
\n \n \n | \n key | \n value | \n
\n \n \n \n 0 | \n eliater | \n 0.0.3-dev-80ab64d2 | \n
\n \n 1 | \n y0 | \n 0.2.12-dev-80ab64d2 | \n
\n \n 2 | \n Run at | \n 2024-05-02 19:52:50 | \n
\n \n
\n
"
},
- "execution_count": 1,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -124,8 +65,8 @@
"from eliater.data import load_ecoli_obs\n",
"from eliater.discover_latent_nodes import find_nuisance_variables, remove_nuisance_variables\n",
"from eliater.examples import ecoli_transcription_example\n",
- "from eliater.network_validation import print_graph_falsifications\n",
- "from eliater.regression import estimate_query, fit_regression, summary_statistics\n",
+ "from eliater.network_validation import print_graph_falsifications, add_ci_undirected_edges\n",
+ "from eliater.regression import estimate_query_by_linear_regression, fit_regression, summary_statistics\n",
"from y0.algorithm.identify import Identification, identify_outcomes\n",
"from y0.dsl import P, Variable\n",
"\n",
@@ -136,16 +77,13 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 22,
"id": "5132a01f-ad3f-4840-a5c9-58293d5b4991",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:48:09.459962Z",
- "start_time": "2024-01-18T15:48:09.430164900Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:52.272673800Z",
+ "start_time": "2024-05-02T23:52:52.265672800Z"
}
},
"outputs": [],
@@ -156,16 +94,13 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 23,
"id": "42c0955c435a1d33",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:48:11.691840700Z",
- "start_time": "2024-01-18T15:48:11.662068700Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:52.675380500Z",
+ "start_time": "2024-05-02T23:52:52.649716200Z"
}
},
"outputs": [],
@@ -177,10 +112,7 @@
"cell_type": "markdown",
"id": "23d671c229f26951",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The observational data are 260 RNA-seq normalized expression profiles of E. coli K-12 MG1655 and BW25113 strains, spanning 154 distinct experimental conditions. These data were retrieved from the PRECISE database (Sastry et al., 2019)."
@@ -188,215 +120,22 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 24,
"id": "493dbce26b06565c",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-26T02:29:22.928853700Z",
- "start_time": "2024-01-26T02:29:22.883310300Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:53.646074800Z",
+ "start_time": "2024-05-02T23:52:53.594811900Z"
}
},
"outputs": [
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " cra | \n",
- " aceE | \n",
- " phoB | \n",
- " cyoA | \n",
- " amtB | \n",
- " appY | \n",
- " citC | \n",
- " dpiB | \n",
- " dpiA | \n",
- " fur | \n",
- " ... | \n",
- " mdh | \n",
- " fis | \n",
- " crp | \n",
- " rpoH | \n",
- " gadX | \n",
- " cspA | \n",
- " oxyR | \n",
- " soxS | \n",
- " dcuR | \n",
- " arcA | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1 | \n",
- " 7.697991 | \n",
- " 10.651307 | \n",
- " 5.599989 | \n",
- " 11.254009 | \n",
- " 5.128441 | \n",
- " 5.919468 | \n",
- " 0.000000 | \n",
- " 4.323397 | \n",
- " 4.761679 | \n",
- " 9.737595 | \n",
- " ... | \n",
- " 11.745289 | \n",
- " 8.936196 | \n",
- " 9.666416 | \n",
- " 8.725102 | \n",
- " 8.614542 | \n",
- " 7.743336 | \n",
- " 8.209081 | \n",
- " 6.230069 | \n",
- " 5.752131 | \n",
- " 9.547993 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 7.581105 | \n",
- " 10.761484 | \n",
- " 5.746619 | \n",
- " 11.431429 | \n",
- " 5.614179 | \n",
- " 5.628823 | \n",
- " 0.000000 | \n",
- " 4.706430 | \n",
- " 4.930888 | \n",
- " 9.784892 | \n",
- " ... | \n",
- " 11.843169 | \n",
- " 8.601549 | \n",
- " 9.621358 | \n",
- " 8.769579 | \n",
- " 8.456629 | \n",
- " 7.474558 | \n",
- " 8.417561 | \n",
- " 6.104497 | \n",
- " 5.667736 | \n",
- " 9.560289 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 6.395808 | \n",
- " 12.109947 | \n",
- " 6.009650 | \n",
- " 9.937863 | \n",
- " 3.866721 | \n",
- " 5.789442 | \n",
- " 0.000000 | \n",
- " 4.776288 | \n",
- " 5.291245 | \n",
- " 9.381882 | \n",
- " ... | \n",
- " 11.166194 | \n",
- " 7.013285 | \n",
- " 9.761605 | \n",
- " 9.649581 | \n",
- " 8.391347 | \n",
- " 3.629759 | \n",
- " 7.841489 | \n",
- " 8.009047 | \n",
- " 5.725878 | \n",
- " 10.737478 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 6.366865 | \n",
- " 11.992996 | \n",
- " 6.131477 | \n",
- " 9.838573 | \n",
- " 3.666917 | \n",
- " 5.496674 | \n",
- " 0.377771 | \n",
- " 4.659630 | \n",
- " 5.366084 | \n",
- " 9.468696 | \n",
- " ... | \n",
- " 11.030617 | \n",
- " 7.078497 | \n",
- " 9.706964 | \n",
- " 9.746059 | \n",
- " 8.180204 | \n",
- " 4.538549 | \n",
- " 7.754378 | \n",
- " 8.172509 | \n",
- " 5.470773 | \n",
- " 10.788613 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 7.315204 | \n",
- " 10.569824 | \n",
- " 5.158324 | \n",
- " 11.272986 | \n",
- " 6.146377 | \n",
- " 5.275278 | \n",
- " 0.190837 | \n",
- " 4.332369 | \n",
- " 4.786446 | \n",
- " 9.779253 | \n",
- " ... | \n",
- " 11.423517 | \n",
- " 9.878379 | \n",
- " 9.692445 | \n",
- " 8.226875 | \n",
- " 8.142990 | \n",
- " 7.147757 | \n",
- " 7.979421 | \n",
- " 6.161353 | \n",
- " 5.738662 | \n",
- " 9.470079 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 44 columns
\n",
- "
"
- ],
- "text/plain": [
- " cra aceE phoB cyoA amtB appY citC \\\n",
- "1 7.697991 10.651307 5.599989 11.254009 5.128441 5.919468 0.000000 \n",
- "2 7.581105 10.761484 5.746619 11.431429 5.614179 5.628823 0.000000 \n",
- "3 6.395808 12.109947 6.009650 9.937863 3.866721 5.789442 0.000000 \n",
- "4 6.366865 11.992996 6.131477 9.838573 3.666917 5.496674 0.377771 \n",
- "5 7.315204 10.569824 5.158324 11.272986 6.146377 5.275278 0.190837 \n",
- "\n",
- " dpiB dpiA fur ... mdh fis crp rpoH \\\n",
- "1 4.323397 4.761679 9.737595 ... 11.745289 8.936196 9.666416 8.725102 \n",
- "2 4.706430 4.930888 9.784892 ... 11.843169 8.601549 9.621358 8.769579 \n",
- "3 4.776288 5.291245 9.381882 ... 11.166194 7.013285 9.761605 9.649581 \n",
- "4 4.659630 5.366084 9.468696 ... 11.030617 7.078497 9.706964 9.746059 \n",
- "5 4.332369 4.786446 9.779253 ... 11.423517 9.878379 9.692445 8.226875 \n",
- "\n",
- " gadX cspA oxyR soxS dcuR arcA \n",
- "1 8.614542 7.743336 8.209081 6.230069 5.752131 9.547993 \n",
- "2 8.456629 7.474558 8.417561 6.104497 5.667736 9.560289 \n",
- "3 8.391347 3.629759 7.841489 8.009047 5.725878 10.737478 \n",
- "4 8.180204 4.538549 7.754378 8.172509 5.470773 10.788613 \n",
- "5 8.142990 7.147757 7.979421 6.161353 5.738662 9.470079 \n",
- "\n",
- "[5 rows x 44 columns]"
- ]
+ "text/plain": " cra aceE phoB cyoA amtB appY citC \\\n1 7.697991 10.651307 5.599989 11.254009 5.128441 5.919468 0.000000 \n2 7.581105 10.761484 5.746619 11.431429 5.614179 5.628823 0.000000 \n3 6.395808 12.109947 6.009650 9.937863 3.866721 5.789442 0.000000 \n4 6.366865 11.992996 6.131477 9.838573 3.666917 5.496674 0.377771 \n5 7.315204 10.569824 5.158324 11.272986 6.146377 5.275278 0.190837 \n\n dpiB dpiA fur ... mdh fis crp rpoH \\\n1 4.323397 4.761679 9.737595 ... 11.745289 8.936196 9.666416 8.725102 \n2 4.706430 4.930888 9.784892 ... 11.843169 8.601549 9.621358 8.769579 \n3 4.776288 5.291245 9.381882 ... 11.166194 7.013285 9.761605 9.649581 \n4 4.659630 5.366084 9.468696 ... 11.030617 7.078497 9.706964 9.746059 \n5 4.332369 4.786446 9.779253 ... 11.423517 9.878379 9.692445 8.226875 \n\n gadX cspA oxyR soxS dcuR arcA \n1 8.614542 7.743336 8.209081 6.230069 5.752131 9.547993 \n2 8.456629 7.474558 8.417561 6.104497 5.667736 9.560289 \n3 8.391347 3.629759 7.841489 8.009047 5.725878 10.737478 \n4 8.180204 4.538549 7.754378 8.172509 5.470773 10.788613 \n5 8.142990 7.147757 7.979421 6.161353 5.738662 9.470079 \n\n[5 rows x 44 columns]",
+ "text/html": "\n\n
\n \n \n | \n cra | \n aceE | \n phoB | \n cyoA | \n amtB | \n appY | \n citC | \n dpiB | \n dpiA | \n fur | \n ... | \n mdh | \n fis | \n crp | \n rpoH | \n gadX | \n cspA | \n oxyR | \n soxS | \n dcuR | \n arcA | \n
\n \n \n \n 1 | \n 7.697991 | \n 10.651307 | \n 5.599989 | \n 11.254009 | \n 5.128441 | \n 5.919468 | \n 0.000000 | \n 4.323397 | \n 4.761679 | \n 9.737595 | \n ... | \n 11.745289 | \n 8.936196 | \n 9.666416 | \n 8.725102 | \n 8.614542 | \n 7.743336 | \n 8.209081 | \n 6.230069 | \n 5.752131 | \n 9.547993 | \n
\n \n 2 | \n 7.581105 | \n 10.761484 | \n 5.746619 | \n 11.431429 | \n 5.614179 | \n 5.628823 | \n 0.000000 | \n 4.706430 | \n 4.930888 | \n 9.784892 | \n ... | \n 11.843169 | \n 8.601549 | \n 9.621358 | \n 8.769579 | \n 8.456629 | \n 7.474558 | \n 8.417561 | \n 6.104497 | \n 5.667736 | \n 9.560289 | \n
\n \n 3 | \n 6.395808 | \n 12.109947 | \n 6.009650 | \n 9.937863 | \n 3.866721 | \n 5.789442 | \n 0.000000 | \n 4.776288 | \n 5.291245 | \n 9.381882 | \n ... | \n 11.166194 | \n 7.013285 | \n 9.761605 | \n 9.649581 | \n 8.391347 | \n 3.629759 | \n 7.841489 | \n 8.009047 | \n 5.725878 | \n 10.737478 | \n
\n \n 4 | \n 6.366865 | \n 11.992996 | \n 6.131477 | \n 9.838573 | \n 3.666917 | \n 5.496674 | \n 0.377771 | \n 4.659630 | \n 5.366084 | \n 9.468696 | \n ... | \n 11.030617 | \n 7.078497 | \n 9.706964 | \n 9.746059 | \n 8.180204 | \n 4.538549 | \n 7.754378 | \n 8.172509 | \n 5.470773 | \n 10.788613 | \n
\n \n 5 | \n 7.315204 | \n 10.569824 | \n 5.158324 | \n 11.272986 | \n 6.146377 | \n 5.275278 | \n 0.190837 | \n 4.332369 | \n 4.786446 | \n 9.779253 | \n ... | \n 11.423517 | \n 9.878379 | \n 9.692445 | \n 8.226875 | \n 8.142990 | \n 7.147757 | \n 7.979421 | \n 6.161353 | \n 5.738662 | \n 9.470079 | \n
\n \n
\n
5 rows × 44 columns
\n
"
},
- "execution_count": 4,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -410,10 +149,7 @@
"cell_type": "markdown",
"id": "3cb336afa4c23502",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 1: Verify correctness of the network structure"
@@ -423,10 +159,7 @@
"cell_type": "markdown",
"id": "dd9954532ad35474",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The total number of d-separations implied by this network is large. It takes several hours to find all of them and test them against data. On the other hand, High-dimensional conditional independence tests can be very unreliable. Hence, it is best to control number of variables to condition upon. Here we selected 3. However, the default in this function is 5. Even with max_given equal to 3, it takes around an hour to produce the results."
@@ -434,43 +167,36 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 25,
"id": "946fd227c4da088d",
"metadata": {
- "ExecuteTime": {
- "start_time": "2024-01-18T16:03:06.113484700Z"
- },
"collapsed": false,
- "is_executing": true,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:12:23.956336800Z",
+ "start_time": "2024-05-02T23:52:55.566431900Z"
}
},
"outputs": [
{
"data": {
+ "text/plain": "Checking d-separations: 0%| | 0/946 [00:00, ?pair/s]",
"application/vnd.jupyter.widget-view+json": {
- "model_id": "202871c1577b4e66b740661b9fe7ded6",
"version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Checking d-separations: 0%| | 0/946 [00:00, ?pair/s]"
- ]
+ "version_minor": 0,
+ "model_id": "9eab715cf1c34de4967fb0920c01b659"
+ }
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
+ "text/plain": "Checking conditionals: 0%| | 0/498 [00:00, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
- "model_id": "9222fa05751a4948867841688e181838",
"version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Checking conditionals: 0%| | 0/498 [00:00, ?it/s]"
- ]
+ "version_minor": 0,
+ "model_id": "3de1fa681c7243e0bebef0cbc2848373"
+ }
},
"metadata": {},
"output_type": "display_data"
@@ -479,203 +205,19 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Of the 498 d-separations implied by the network's structure, 202 (40.56%) rejected the null hypothesis at p<0.01.\n",
+ "Of the 498 d-separations implied by the network's structure, 201 (40.36%) rejected the null hypothesis at p<0.01.\n",
"\n",
"Since this is more than 30%, Eliater considers this a major inconsistency and therefore suggests adding appropriate bidirected edges using the eliater.add_ci_undirected_edges() function.\n",
"\n",
- "Finished in 293.75 seconds.\n",
- "\n"
+ "Finished in 1168.38 seconds.\n"
]
},
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " left | \n",
- " right | \n",
- " given | \n",
- " stats | \n",
- " p | \n",
- " dof | \n",
- " p_adj | \n",
- " p_adj_significant | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " appA | \n",
- " appB | \n",
- " appY | \n",
- " 0.899419 | \n",
- " 5.350841e-94 | \n",
- " None | \n",
- " 2.664719e-91 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " appA | \n",
- " phoB | \n",
- " appY | \n",
- " 0.840764 | \n",
- " 3.572616e-70 | \n",
- " None | \n",
- " 1.775590e-67 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " appA | \n",
- " narL | \n",
- " appY | \n",
- " 0.838726 | \n",
- " 1.583492e-69 | \n",
- " None | \n",
- " 7.854120e-67 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " appA | \n",
- " arcA | \n",
- " appY | \n",
- " 0.833926 | \n",
- " 4.864309e-68 | \n",
- " None | \n",
- " 2.407833e-65 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " appA | \n",
- " rpoS | \n",
- " appY | \n",
- " 0.832139 | \n",
- " 1.693584e-67 | \n",
- " None | \n",
- " 8.366305e-65 | \n",
- " True | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 493 | \n",
- " cyoA | \n",
- " exuT | \n",
- " crp|dpiA | \n",
- " -0.011299 | \n",
- " 8.566703e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 494 | \n",
- " dcuR | \n",
- " rpoH | \n",
- " | \n",
- " 0.102291 | \n",
- " 1.011384e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 495 | \n",
- " cspA | \n",
- " exuT | \n",
- " | \n",
- " -0.142326 | \n",
- " 2.221632e-02 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 496 | \n",
- " btsR | \n",
- " rpoS | \n",
- " | \n",
- " -0.048802 | \n",
- " 4.350762e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 497 | \n",
- " hns | \n",
- " iscR | \n",
- " | \n",
- " -0.027520 | \n",
- " 6.599513e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- "
\n",
- "
498 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " left right given stats p dof p_adj \\\n",
- "0 appA appB appY 0.899419 5.350841e-94 None 2.664719e-91 \n",
- "1 appA phoB appY 0.840764 3.572616e-70 None 1.775590e-67 \n",
- "2 appA narL appY 0.838726 1.583492e-69 None 7.854120e-67 \n",
- "3 appA arcA appY 0.833926 4.864309e-68 None 2.407833e-65 \n",
- "4 appA rpoS appY 0.832139 1.693584e-67 None 8.366305e-65 \n",
- ".. ... ... ... ... ... ... ... \n",
- "493 cyoA exuT crp|dpiA -0.011299 8.566703e-01 None 1.000000e+00 \n",
- "494 dcuR rpoH 0.102291 1.011384e-01 None 1.000000e+00 \n",
- "495 cspA exuT -0.142326 2.221632e-02 None 1.000000e+00 \n",
- "496 btsR rpoS -0.048802 4.350762e-01 None 1.000000e+00 \n",
- "497 hns iscR -0.027520 6.599513e-01 None 1.000000e+00 \n",
- "\n",
- " p_adj_significant \n",
- "0 True \n",
- "1 True \n",
- "2 True \n",
- "3 True \n",
- "4 True \n",
- ".. ... \n",
- "493 False \n",
- "494 False \n",
- "495 False \n",
- "496 False \n",
- "497 False \n",
- "\n",
- "[498 rows x 8 columns]"
- ]
+ "text/plain": " left right given stats p dof p_adj \\\n0 appA appB appY 0.899419 5.350841e-94 None 2.664719e-91 \n1 appA phoB appY 0.840764 3.572616e-70 None 1.775590e-67 \n2 appA narL appY 0.838726 1.583492e-69 None 7.854120e-67 \n3 appA arcA appY 0.833926 4.864309e-68 None 2.407833e-65 \n4 appA rpoS appY 0.832139 1.693584e-67 None 8.366305e-65 \n.. ... ... ... ... ... ... ... \n493 gutM modE 0.137754 2.693370e-02 None 1.000000e+00 \n494 cra narL phoB 0.031518 6.143216e-01 None 1.000000e+00 \n495 gutM lrp crp|hns 0.099114 1.122438e-01 None 1.000000e+00 \n496 citC narP 0.093459 1.343524e-01 None 1.000000e+00 \n497 btsR ydeO -0.103919 9.579462e-02 None 1.000000e+00 \n\n p_adj_significant \n0 True \n1 True \n2 True \n3 True \n4 True \n.. ... \n493 False \n494 False \n495 False \n496 False \n497 False \n\n[498 rows x 8 columns]",
+ "text/html": "\n\n
\n \n \n | \n left | \n right | \n given | \n stats | \n p | \n dof | \n p_adj | \n p_adj_significant | \n
\n \n \n \n 0 | \n appA | \n appB | \n appY | \n 0.899419 | \n 5.350841e-94 | \n None | \n 2.664719e-91 | \n True | \n
\n \n 1 | \n appA | \n phoB | \n appY | \n 0.840764 | \n 3.572616e-70 | \n None | \n 1.775590e-67 | \n True | \n
\n \n 2 | \n appA | \n narL | \n appY | \n 0.838726 | \n 1.583492e-69 | \n None | \n 7.854120e-67 | \n True | \n
\n \n 3 | \n appA | \n arcA | \n appY | \n 0.833926 | \n 4.864309e-68 | \n None | \n 2.407833e-65 | \n True | \n
\n \n 4 | \n appA | \n rpoS | \n appY | \n 0.832139 | \n 1.693584e-67 | \n None | \n 8.366305e-65 | \n True | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 493 | \n gutM | \n modE | \n | \n 0.137754 | \n 2.693370e-02 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 494 | \n cra | \n narL | \n phoB | \n 0.031518 | \n 6.143216e-01 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 495 | \n gutM | \n lrp | \n crp|hns | \n 0.099114 | \n 1.122438e-01 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 496 | \n citC | \n narP | \n | \n 0.093459 | \n 1.343524e-01 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 497 | \n btsR | \n ydeO | \n | \n -0.103919 | \n 9.579462e-02 | \n None | \n 1.000000e+00 | \n False | \n
\n \n
\n
498 rows × 8 columns
\n
"
},
- "execution_count": 5,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -693,43 +235,55 @@
]
},
{
- "cell_type": "markdown",
- "id": "de84acea8401aa82",
+ "cell_type": "code",
+ "outputs": [],
+ "source": [
+ "graph = add_ci_undirected_edges(\n",
+ " graph=graph,\n",
+ " data=data,\n",
+ " method=\"pearson\",\n",
+ " significance_level=0.01,\n",
+ " max_conditions=3\n",
+ ")"
+ ],
"metadata": {
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:15.077600500Z",
+ "start_time": "2024-05-03T00:12:23.956336800Z"
}
},
+ "id": "271997c47da05e0",
+ "execution_count": 26
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de84acea8401aa82",
+ "metadata": {
+ "collapsed": false
+ },
"source": [
"## Step 2: Check query identifiability"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 27,
"id": "9db1518afcea7377",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:06.598133800Z",
- "start_time": "2024-01-18T15:49:06.565276800Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:15.145603100Z",
+ "start_time": "2024-05-03T00:38:15.078674Z"
}
},
"outputs": [
{
"data": {
- "text/latex": [
- "$\\sum\\limits_{arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS} P(arcA | rpoD) P(crp | arcA, ihfA, rpoD, rpoS) P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, oxyR, rpoD, rpoS, soxS) P(ihfA | arcA, rpoD, rpoS) P(ihfB | arcA, ihfA, rpoD, rpoS) P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) P(rpoS | arcA, rpoD) \\sum\\limits_{arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, narL, oxyR, rpoD, rpoS, soxS} \\sum\\limits_{aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO} P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO) \\sum\\limits_{arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS} \\sum\\limits_{aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO} P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO)$"
- ],
- "text/plain": [
- "Sum[arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS](P(arcA | rpoD) * P(crp | arcA, ihfA, rpoD, rpoS) * P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, oxyR, rpoD, rpoS, soxS) * P(ihfA | arcA, rpoD, rpoS) * P(ihfB | arcA, ihfA, rpoD, rpoS) * P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) * P(rpoS | arcA, rpoD) * Sum[arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, narL, oxyR, rpoD, rpoS, soxS](Sum[aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO](P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO))) * Sum[arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS](Sum[aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO](P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO))))"
- ]
+ "text/plain": "Sum[arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS](P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * Sum[arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS](Sum[aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO](P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO))) * Sum[lrp, oxyR, soxS](P(arcA | modE, rpoD) * P(crp | arcA, ihfA, lrp, modE, rpoD, rpoS, soxS) * P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) * P(ihfA | arcA, lrp, modE, rpoD, rpoS) * P(ihfB | arcA, crp, ihfA, lrp, modE, rpoD, rpoS, soxS) * P(lrp | arcA, modE, rpoD, rpoS) * P(modE | rpoD) * P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) * P(oxyR | arcA, crp, ihfA, ihfB, lrp, modE, rpoD, rpoS, soxS) * P(rpoS | arcA, modE, rpoD) * P(soxS | arcA, ihfA, lrp, modE, rpoD, rpoS)))",
+ "text/latex": "$\\sum\\limits_{arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS} P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) \\sum\\limits_{arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS} \\sum\\limits_{aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO} P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO) \\sum\\limits_{lrp, oxyR, soxS} P(arcA | modE, rpoD) P(crp | arcA, ihfA, lrp, modE, rpoD, rpoS, soxS) P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) P(ihfA | arcA, lrp, modE, rpoD, rpoS) P(ihfB | arcA, crp, ihfA, lrp, modE, rpoD, rpoS, soxS) P(lrp | arcA, modE, rpoD, rpoS) P(modE | rpoD) P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) P(oxyR | arcA, crp, ihfA, ihfB, lrp, modE, rpoD, rpoS, soxS) P(rpoS | arcA, modE, rpoD) P(soxS | arcA, ihfA, lrp, modE, rpoD, rpoS)$"
},
- "execution_count": 6,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -742,10 +296,7 @@
"cell_type": "markdown",
"id": "4abeb139d356f07c",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The query is identifiable. Hence, we can proceed to the next step."
@@ -755,10 +306,7 @@
"cell_type": "markdown",
"id": "c61266793a90058b",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 3: Find nuisance variables and mark them as latent"
@@ -768,10 +316,7 @@
"cell_type": "markdown",
"id": "55c4bfe949593a4a",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"This function finds the nuisance variables for the input graph."
@@ -779,16 +324,13 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 28,
"id": "80a72ccd9a8ce726",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:09.920296900Z",
- "start_time": "2024-01-18T15:49:09.890834200Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:15.339071700Z",
+ "start_time": "2024-05-03T00:38:15.147371500Z"
}
},
"outputs": [
@@ -801,31 +343,9 @@
},
{
"data": {
- "text/plain": [
- "[aceE,\n",
- " amtB,\n",
- " appA,\n",
- " appB,\n",
- " appY,\n",
- " aspC,\n",
- " cirA,\n",
- " citC,\n",
- " cydD,\n",
- " cyoA,\n",
- " dpiB,\n",
- " exuT,\n",
- " gadX,\n",
- " gutM,\n",
- " hcp,\n",
- " hns,\n",
- " hyaA,\n",
- " hyaB,\n",
- " hyaF,\n",
- " mdh,\n",
- " ydeO]"
- ]
+ "text/plain": "[aceE,\n amtB,\n appA,\n appB,\n appY,\n aspC,\n cirA,\n citC,\n cydD,\n cyoA,\n dpiB,\n exuT,\n gadX,\n gutM,\n hcp,\n hns,\n hyaA,\n hyaB,\n hyaF,\n mdh,\n ydeO]"
},
- "execution_count": 7,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -840,10 +360,7 @@
"cell_type": "markdown",
"id": "a10322ec91a50588",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 4: Simplify the network"
@@ -853,10 +370,7 @@
"cell_type": "markdown",
"id": "62b5c7bd4d5db516",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The following function find the nuisance variable (step 3), marks them as latent and then applies Evan's simplification rules to remove the nuisance variables."
@@ -864,2713 +378,20 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 29,
"id": "213d620224db3470",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:14.496076400Z",
- "start_time": "2024-01-18T15:49:14.452122800Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:18.348030500Z",
+ "start_time": "2024-05-03T00:38:15.180664900Z"
}
},
"outputs": [
{
"data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "