diff --git a/.gitignore b/.gitignore
index de56800..7c956a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -900,3 +900,4 @@ FodyWeavers.xsd
scratch/
src/eliater/case_studies.tsv
+.idea/*
\ No newline at end of file
diff --git a/notebooks/Case_study3_The_EColi.ipynb b/notebooks/Case_study3_The_EColi.ipynb
index 96a165c..3e19149 100644
--- a/notebooks/Case_study3_The_EColi.ipynb
+++ b/notebooks/Case_study3_The_EColi.ipynb
@@ -4,10 +4,7 @@
"cell_type": "markdown",
"id": "a618fea7cca96207",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"# Case study 3: The Escherichia coli K-12 transcriptional motif"
@@ -17,10 +14,7 @@
"cell_type": "markdown",
"id": "a0cedf172ad6ad7b",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"This is case study 3 in Figure 7 in this paper: *Eliater: an open source software for causal query estimation from observational measurements of biomolecular networks. The figure below shows the transcriptional regulatory network motif of E. coli from the EcoCyc database (Keseler et al., 2021). It comprises 44 nodes, of which 7 are latent and 45 are\n",
@@ -31,10 +25,7 @@
"cell_type": "markdown",
"id": "f505e30434334788",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"data:image/s3,"s3://crabby-images/7dc45/7dc456325136e9fe55c60c5edf96253c3f9942d8" alt="ecoli""
@@ -42,72 +33,22 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 21,
"id": "2942bcdf-98ce-450c-a888-fed60f417ff1",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-26T02:29:20.417608700Z",
- "start_time": "2024-01-26T02:29:20.397731200Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:50.975800700Z",
+ "start_time": "2024-05-02T23:52:50.808118Z"
}
},
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " key | \n",
- " value | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " eliater | \n",
- " 0.0.3-dev-28d9867e | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " y0 | \n",
- " 0.2.10-dev-8f27d998 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Run at | \n",
- " 2024-04-25 09:07:53 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " key value\n",
- "0 eliater 0.0.3-dev-28d9867e\n",
- "1 y0 0.2.10-dev-8f27d998\n",
- "2 Run at 2024-04-25 09:07:53"
- ]
+ "text/plain": " key value\n0 eliater 0.0.3-dev-80ab64d2\n1 y0 0.2.12-dev-80ab64d2\n2 Run at 2024-05-02 19:52:50",
+ "text/html": "\n\n
\n \n \n | \n key | \n value | \n
\n \n \n \n 0 | \n eliater | \n 0.0.3-dev-80ab64d2 | \n
\n \n 1 | \n y0 | \n 0.2.12-dev-80ab64d2 | \n
\n \n 2 | \n Run at | \n 2024-05-02 19:52:50 | \n
\n \n
\n
"
},
- "execution_count": 1,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -124,8 +65,8 @@
"from eliater.data import load_ecoli_obs\n",
"from eliater.discover_latent_nodes import find_nuisance_variables, remove_nuisance_variables\n",
"from eliater.examples import ecoli_transcription_example\n",
- "from eliater.network_validation import print_graph_falsifications\n",
- "from eliater.regression import estimate_query, fit_regression, summary_statistics\n",
+ "from eliater.network_validation import print_graph_falsifications, add_ci_undirected_edges\n",
+ "from eliater.regression import estimate_query_by_linear_regression, fit_regression, summary_statistics\n",
"from y0.algorithm.identify import Identification, identify_outcomes\n",
"from y0.dsl import P, Variable\n",
"\n",
@@ -136,16 +77,13 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 22,
"id": "5132a01f-ad3f-4840-a5c9-58293d5b4991",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:48:09.459962Z",
- "start_time": "2024-01-18T15:48:09.430164900Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:52.272673800Z",
+ "start_time": "2024-05-02T23:52:52.265672800Z"
}
},
"outputs": [],
@@ -156,16 +94,13 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 23,
"id": "42c0955c435a1d33",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:48:11.691840700Z",
- "start_time": "2024-01-18T15:48:11.662068700Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:52.675380500Z",
+ "start_time": "2024-05-02T23:52:52.649716200Z"
}
},
"outputs": [],
@@ -177,10 +112,7 @@
"cell_type": "markdown",
"id": "23d671c229f26951",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The observational data are 260 RNA-seq normalized expression profiles of E. coli K-12 MG1655 and BW25113 strains, spanning 154 distinct experimental conditions. These data were retrieved from the PRECISE database (Sastry et al., 2019)."
@@ -188,215 +120,22 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 24,
"id": "493dbce26b06565c",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-26T02:29:22.928853700Z",
- "start_time": "2024-01-26T02:29:22.883310300Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-02T23:52:53.646074800Z",
+ "start_time": "2024-05-02T23:52:53.594811900Z"
}
},
"outputs": [
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " cra | \n",
- " aceE | \n",
- " phoB | \n",
- " cyoA | \n",
- " amtB | \n",
- " appY | \n",
- " citC | \n",
- " dpiB | \n",
- " dpiA | \n",
- " fur | \n",
- " ... | \n",
- " mdh | \n",
- " fis | \n",
- " crp | \n",
- " rpoH | \n",
- " gadX | \n",
- " cspA | \n",
- " oxyR | \n",
- " soxS | \n",
- " dcuR | \n",
- " arcA | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1 | \n",
- " 7.697991 | \n",
- " 10.651307 | \n",
- " 5.599989 | \n",
- " 11.254009 | \n",
- " 5.128441 | \n",
- " 5.919468 | \n",
- " 0.000000 | \n",
- " 4.323397 | \n",
- " 4.761679 | \n",
- " 9.737595 | \n",
- " ... | \n",
- " 11.745289 | \n",
- " 8.936196 | \n",
- " 9.666416 | \n",
- " 8.725102 | \n",
- " 8.614542 | \n",
- " 7.743336 | \n",
- " 8.209081 | \n",
- " 6.230069 | \n",
- " 5.752131 | \n",
- " 9.547993 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 7.581105 | \n",
- " 10.761484 | \n",
- " 5.746619 | \n",
- " 11.431429 | \n",
- " 5.614179 | \n",
- " 5.628823 | \n",
- " 0.000000 | \n",
- " 4.706430 | \n",
- " 4.930888 | \n",
- " 9.784892 | \n",
- " ... | \n",
- " 11.843169 | \n",
- " 8.601549 | \n",
- " 9.621358 | \n",
- " 8.769579 | \n",
- " 8.456629 | \n",
- " 7.474558 | \n",
- " 8.417561 | \n",
- " 6.104497 | \n",
- " 5.667736 | \n",
- " 9.560289 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 6.395808 | \n",
- " 12.109947 | \n",
- " 6.009650 | \n",
- " 9.937863 | \n",
- " 3.866721 | \n",
- " 5.789442 | \n",
- " 0.000000 | \n",
- " 4.776288 | \n",
- " 5.291245 | \n",
- " 9.381882 | \n",
- " ... | \n",
- " 11.166194 | \n",
- " 7.013285 | \n",
- " 9.761605 | \n",
- " 9.649581 | \n",
- " 8.391347 | \n",
- " 3.629759 | \n",
- " 7.841489 | \n",
- " 8.009047 | \n",
- " 5.725878 | \n",
- " 10.737478 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 6.366865 | \n",
- " 11.992996 | \n",
- " 6.131477 | \n",
- " 9.838573 | \n",
- " 3.666917 | \n",
- " 5.496674 | \n",
- " 0.377771 | \n",
- " 4.659630 | \n",
- " 5.366084 | \n",
- " 9.468696 | \n",
- " ... | \n",
- " 11.030617 | \n",
- " 7.078497 | \n",
- " 9.706964 | \n",
- " 9.746059 | \n",
- " 8.180204 | \n",
- " 4.538549 | \n",
- " 7.754378 | \n",
- " 8.172509 | \n",
- " 5.470773 | \n",
- " 10.788613 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 7.315204 | \n",
- " 10.569824 | \n",
- " 5.158324 | \n",
- " 11.272986 | \n",
- " 6.146377 | \n",
- " 5.275278 | \n",
- " 0.190837 | \n",
- " 4.332369 | \n",
- " 4.786446 | \n",
- " 9.779253 | \n",
- " ... | \n",
- " 11.423517 | \n",
- " 9.878379 | \n",
- " 9.692445 | \n",
- " 8.226875 | \n",
- " 8.142990 | \n",
- " 7.147757 | \n",
- " 7.979421 | \n",
- " 6.161353 | \n",
- " 5.738662 | \n",
- " 9.470079 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 44 columns
\n",
- "
"
- ],
- "text/plain": [
- " cra aceE phoB cyoA amtB appY citC \\\n",
- "1 7.697991 10.651307 5.599989 11.254009 5.128441 5.919468 0.000000 \n",
- "2 7.581105 10.761484 5.746619 11.431429 5.614179 5.628823 0.000000 \n",
- "3 6.395808 12.109947 6.009650 9.937863 3.866721 5.789442 0.000000 \n",
- "4 6.366865 11.992996 6.131477 9.838573 3.666917 5.496674 0.377771 \n",
- "5 7.315204 10.569824 5.158324 11.272986 6.146377 5.275278 0.190837 \n",
- "\n",
- " dpiB dpiA fur ... mdh fis crp rpoH \\\n",
- "1 4.323397 4.761679 9.737595 ... 11.745289 8.936196 9.666416 8.725102 \n",
- "2 4.706430 4.930888 9.784892 ... 11.843169 8.601549 9.621358 8.769579 \n",
- "3 4.776288 5.291245 9.381882 ... 11.166194 7.013285 9.761605 9.649581 \n",
- "4 4.659630 5.366084 9.468696 ... 11.030617 7.078497 9.706964 9.746059 \n",
- "5 4.332369 4.786446 9.779253 ... 11.423517 9.878379 9.692445 8.226875 \n",
- "\n",
- " gadX cspA oxyR soxS dcuR arcA \n",
- "1 8.614542 7.743336 8.209081 6.230069 5.752131 9.547993 \n",
- "2 8.456629 7.474558 8.417561 6.104497 5.667736 9.560289 \n",
- "3 8.391347 3.629759 7.841489 8.009047 5.725878 10.737478 \n",
- "4 8.180204 4.538549 7.754378 8.172509 5.470773 10.788613 \n",
- "5 8.142990 7.147757 7.979421 6.161353 5.738662 9.470079 \n",
- "\n",
- "[5 rows x 44 columns]"
- ]
+ "text/plain": " cra aceE phoB cyoA amtB appY citC \\\n1 7.697991 10.651307 5.599989 11.254009 5.128441 5.919468 0.000000 \n2 7.581105 10.761484 5.746619 11.431429 5.614179 5.628823 0.000000 \n3 6.395808 12.109947 6.009650 9.937863 3.866721 5.789442 0.000000 \n4 6.366865 11.992996 6.131477 9.838573 3.666917 5.496674 0.377771 \n5 7.315204 10.569824 5.158324 11.272986 6.146377 5.275278 0.190837 \n\n dpiB dpiA fur ... mdh fis crp rpoH \\\n1 4.323397 4.761679 9.737595 ... 11.745289 8.936196 9.666416 8.725102 \n2 4.706430 4.930888 9.784892 ... 11.843169 8.601549 9.621358 8.769579 \n3 4.776288 5.291245 9.381882 ... 11.166194 7.013285 9.761605 9.649581 \n4 4.659630 5.366084 9.468696 ... 11.030617 7.078497 9.706964 9.746059 \n5 4.332369 4.786446 9.779253 ... 11.423517 9.878379 9.692445 8.226875 \n\n gadX cspA oxyR soxS dcuR arcA \n1 8.614542 7.743336 8.209081 6.230069 5.752131 9.547993 \n2 8.456629 7.474558 8.417561 6.104497 5.667736 9.560289 \n3 8.391347 3.629759 7.841489 8.009047 5.725878 10.737478 \n4 8.180204 4.538549 7.754378 8.172509 5.470773 10.788613 \n5 8.142990 7.147757 7.979421 6.161353 5.738662 9.470079 \n\n[5 rows x 44 columns]",
+ "text/html": "\n\n
\n \n \n | \n cra | \n aceE | \n phoB | \n cyoA | \n amtB | \n appY | \n citC | \n dpiB | \n dpiA | \n fur | \n ... | \n mdh | \n fis | \n crp | \n rpoH | \n gadX | \n cspA | \n oxyR | \n soxS | \n dcuR | \n arcA | \n
\n \n \n \n 1 | \n 7.697991 | \n 10.651307 | \n 5.599989 | \n 11.254009 | \n 5.128441 | \n 5.919468 | \n 0.000000 | \n 4.323397 | \n 4.761679 | \n 9.737595 | \n ... | \n 11.745289 | \n 8.936196 | \n 9.666416 | \n 8.725102 | \n 8.614542 | \n 7.743336 | \n 8.209081 | \n 6.230069 | \n 5.752131 | \n 9.547993 | \n
\n \n 2 | \n 7.581105 | \n 10.761484 | \n 5.746619 | \n 11.431429 | \n 5.614179 | \n 5.628823 | \n 0.000000 | \n 4.706430 | \n 4.930888 | \n 9.784892 | \n ... | \n 11.843169 | \n 8.601549 | \n 9.621358 | \n 8.769579 | \n 8.456629 | \n 7.474558 | \n 8.417561 | \n 6.104497 | \n 5.667736 | \n 9.560289 | \n
\n \n 3 | \n 6.395808 | \n 12.109947 | \n 6.009650 | \n 9.937863 | \n 3.866721 | \n 5.789442 | \n 0.000000 | \n 4.776288 | \n 5.291245 | \n 9.381882 | \n ... | \n 11.166194 | \n 7.013285 | \n 9.761605 | \n 9.649581 | \n 8.391347 | \n 3.629759 | \n 7.841489 | \n 8.009047 | \n 5.725878 | \n 10.737478 | \n
\n \n 4 | \n 6.366865 | \n 11.992996 | \n 6.131477 | \n 9.838573 | \n 3.666917 | \n 5.496674 | \n 0.377771 | \n 4.659630 | \n 5.366084 | \n 9.468696 | \n ... | \n 11.030617 | \n 7.078497 | \n 9.706964 | \n 9.746059 | \n 8.180204 | \n 4.538549 | \n 7.754378 | \n 8.172509 | \n 5.470773 | \n 10.788613 | \n
\n \n 5 | \n 7.315204 | \n 10.569824 | \n 5.158324 | \n 11.272986 | \n 6.146377 | \n 5.275278 | \n 0.190837 | \n 4.332369 | \n 4.786446 | \n 9.779253 | \n ... | \n 11.423517 | \n 9.878379 | \n 9.692445 | \n 8.226875 | \n 8.142990 | \n 7.147757 | \n 7.979421 | \n 6.161353 | \n 5.738662 | \n 9.470079 | \n
\n \n
\n
5 rows × 44 columns
\n
"
},
- "execution_count": 4,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -410,10 +149,7 @@
"cell_type": "markdown",
"id": "3cb336afa4c23502",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 1: Verify correctness of the network structure"
@@ -423,10 +159,7 @@
"cell_type": "markdown",
"id": "dd9954532ad35474",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The total number of d-separations implied by this network is large. It takes several hours to find all of them and test them against data. On the other hand, High-dimensional conditional independence tests can be very unreliable. Hence, it is best to control number of variables to condition upon. Here we selected 3. However, the default in this function is 5. Even with max_given equal to 3, it takes around an hour to produce the results."
@@ -434,43 +167,36 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 25,
"id": "946fd227c4da088d",
"metadata": {
- "ExecuteTime": {
- "start_time": "2024-01-18T16:03:06.113484700Z"
- },
"collapsed": false,
- "is_executing": true,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:12:23.956336800Z",
+ "start_time": "2024-05-02T23:52:55.566431900Z"
}
},
"outputs": [
{
"data": {
+ "text/plain": "Checking d-separations: 0%| | 0/946 [00:00, ?pair/s]",
"application/vnd.jupyter.widget-view+json": {
- "model_id": "202871c1577b4e66b740661b9fe7ded6",
"version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Checking d-separations: 0%| | 0/946 [00:00, ?pair/s]"
- ]
+ "version_minor": 0,
+ "model_id": "9eab715cf1c34de4967fb0920c01b659"
+ }
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
+ "text/plain": "Checking conditionals: 0%| | 0/498 [00:00, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
- "model_id": "9222fa05751a4948867841688e181838",
"version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Checking conditionals: 0%| | 0/498 [00:00, ?it/s]"
- ]
+ "version_minor": 0,
+ "model_id": "3de1fa681c7243e0bebef0cbc2848373"
+ }
},
"metadata": {},
"output_type": "display_data"
@@ -479,203 +205,19 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Of the 498 d-separations implied by the network's structure, 202 (40.56%) rejected the null hypothesis at p<0.01.\n",
+ "Of the 498 d-separations implied by the network's structure, 201 (40.36%) rejected the null hypothesis at p<0.01.\n",
"\n",
"Since this is more than 30%, Eliater considers this a major inconsistency and therefore suggests adding appropriate bidirected edges using the eliater.add_ci_undirected_edges() function.\n",
"\n",
- "Finished in 293.75 seconds.\n",
- "\n"
+ "Finished in 1168.38 seconds.\n"
]
},
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " left | \n",
- " right | \n",
- " given | \n",
- " stats | \n",
- " p | \n",
- " dof | \n",
- " p_adj | \n",
- " p_adj_significant | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " appA | \n",
- " appB | \n",
- " appY | \n",
- " 0.899419 | \n",
- " 5.350841e-94 | \n",
- " None | \n",
- " 2.664719e-91 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " appA | \n",
- " phoB | \n",
- " appY | \n",
- " 0.840764 | \n",
- " 3.572616e-70 | \n",
- " None | \n",
- " 1.775590e-67 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " appA | \n",
- " narL | \n",
- " appY | \n",
- " 0.838726 | \n",
- " 1.583492e-69 | \n",
- " None | \n",
- " 7.854120e-67 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " appA | \n",
- " arcA | \n",
- " appY | \n",
- " 0.833926 | \n",
- " 4.864309e-68 | \n",
- " None | \n",
- " 2.407833e-65 | \n",
- " True | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " appA | \n",
- " rpoS | \n",
- " appY | \n",
- " 0.832139 | \n",
- " 1.693584e-67 | \n",
- " None | \n",
- " 8.366305e-65 | \n",
- " True | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 493 | \n",
- " cyoA | \n",
- " exuT | \n",
- " crp|dpiA | \n",
- " -0.011299 | \n",
- " 8.566703e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 494 | \n",
- " dcuR | \n",
- " rpoH | \n",
- " | \n",
- " 0.102291 | \n",
- " 1.011384e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 495 | \n",
- " cspA | \n",
- " exuT | \n",
- " | \n",
- " -0.142326 | \n",
- " 2.221632e-02 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 496 | \n",
- " btsR | \n",
- " rpoS | \n",
- " | \n",
- " -0.048802 | \n",
- " 4.350762e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- " 497 | \n",
- " hns | \n",
- " iscR | \n",
- " | \n",
- " -0.027520 | \n",
- " 6.599513e-01 | \n",
- " None | \n",
- " 1.000000e+00 | \n",
- " False | \n",
- "
\n",
- " \n",
- "
\n",
- "
498 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " left right given stats p dof p_adj \\\n",
- "0 appA appB appY 0.899419 5.350841e-94 None 2.664719e-91 \n",
- "1 appA phoB appY 0.840764 3.572616e-70 None 1.775590e-67 \n",
- "2 appA narL appY 0.838726 1.583492e-69 None 7.854120e-67 \n",
- "3 appA arcA appY 0.833926 4.864309e-68 None 2.407833e-65 \n",
- "4 appA rpoS appY 0.832139 1.693584e-67 None 8.366305e-65 \n",
- ".. ... ... ... ... ... ... ... \n",
- "493 cyoA exuT crp|dpiA -0.011299 8.566703e-01 None 1.000000e+00 \n",
- "494 dcuR rpoH 0.102291 1.011384e-01 None 1.000000e+00 \n",
- "495 cspA exuT -0.142326 2.221632e-02 None 1.000000e+00 \n",
- "496 btsR rpoS -0.048802 4.350762e-01 None 1.000000e+00 \n",
- "497 hns iscR -0.027520 6.599513e-01 None 1.000000e+00 \n",
- "\n",
- " p_adj_significant \n",
- "0 True \n",
- "1 True \n",
- "2 True \n",
- "3 True \n",
- "4 True \n",
- ".. ... \n",
- "493 False \n",
- "494 False \n",
- "495 False \n",
- "496 False \n",
- "497 False \n",
- "\n",
- "[498 rows x 8 columns]"
- ]
+ "text/plain": " left right given stats p dof p_adj \\\n0 appA appB appY 0.899419 5.350841e-94 None 2.664719e-91 \n1 appA phoB appY 0.840764 3.572616e-70 None 1.775590e-67 \n2 appA narL appY 0.838726 1.583492e-69 None 7.854120e-67 \n3 appA arcA appY 0.833926 4.864309e-68 None 2.407833e-65 \n4 appA rpoS appY 0.832139 1.693584e-67 None 8.366305e-65 \n.. ... ... ... ... ... ... ... \n493 gutM modE 0.137754 2.693370e-02 None 1.000000e+00 \n494 cra narL phoB 0.031518 6.143216e-01 None 1.000000e+00 \n495 gutM lrp crp|hns 0.099114 1.122438e-01 None 1.000000e+00 \n496 citC narP 0.093459 1.343524e-01 None 1.000000e+00 \n497 btsR ydeO -0.103919 9.579462e-02 None 1.000000e+00 \n\n p_adj_significant \n0 True \n1 True \n2 True \n3 True \n4 True \n.. ... \n493 False \n494 False \n495 False \n496 False \n497 False \n\n[498 rows x 8 columns]",
+ "text/html": "\n\n
\n \n \n | \n left | \n right | \n given | \n stats | \n p | \n dof | \n p_adj | \n p_adj_significant | \n
\n \n \n \n 0 | \n appA | \n appB | \n appY | \n 0.899419 | \n 5.350841e-94 | \n None | \n 2.664719e-91 | \n True | \n
\n \n 1 | \n appA | \n phoB | \n appY | \n 0.840764 | \n 3.572616e-70 | \n None | \n 1.775590e-67 | \n True | \n
\n \n 2 | \n appA | \n narL | \n appY | \n 0.838726 | \n 1.583492e-69 | \n None | \n 7.854120e-67 | \n True | \n
\n \n 3 | \n appA | \n arcA | \n appY | \n 0.833926 | \n 4.864309e-68 | \n None | \n 2.407833e-65 | \n True | \n
\n \n 4 | \n appA | \n rpoS | \n appY | \n 0.832139 | \n 1.693584e-67 | \n None | \n 8.366305e-65 | \n True | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 493 | \n gutM | \n modE | \n | \n 0.137754 | \n 2.693370e-02 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 494 | \n cra | \n narL | \n phoB | \n 0.031518 | \n 6.143216e-01 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 495 | \n gutM | \n lrp | \n crp|hns | \n 0.099114 | \n 1.122438e-01 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 496 | \n citC | \n narP | \n | \n 0.093459 | \n 1.343524e-01 | \n None | \n 1.000000e+00 | \n False | \n
\n \n 497 | \n btsR | \n ydeO | \n | \n -0.103919 | \n 9.579462e-02 | \n None | \n 1.000000e+00 | \n False | \n
\n \n
\n
498 rows × 8 columns
\n
"
},
- "execution_count": 5,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -693,43 +235,55 @@
]
},
{
- "cell_type": "markdown",
- "id": "de84acea8401aa82",
+ "cell_type": "code",
+ "outputs": [],
+ "source": [
+ "graph = add_ci_undirected_edges(\n",
+ " graph=graph,\n",
+ " data=data,\n",
+ " method=\"pearson\",\n",
+ " significance_level=0.01,\n",
+ " max_conditions=3\n",
+ ")"
+ ],
"metadata": {
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:15.077600500Z",
+ "start_time": "2024-05-03T00:12:23.956336800Z"
}
},
+ "id": "271997c47da05e0",
+ "execution_count": 26
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de84acea8401aa82",
+ "metadata": {
+ "collapsed": false
+ },
"source": [
"## Step 2: Check query identifiability"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 27,
"id": "9db1518afcea7377",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:06.598133800Z",
- "start_time": "2024-01-18T15:49:06.565276800Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:15.145603100Z",
+ "start_time": "2024-05-03T00:38:15.078674Z"
}
},
"outputs": [
{
"data": {
- "text/latex": [
- "$\\sum\\limits_{arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS} P(arcA | rpoD) P(crp | arcA, ihfA, rpoD, rpoS) P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, oxyR, rpoD, rpoS, soxS) P(ihfA | arcA, rpoD, rpoS) P(ihfB | arcA, ihfA, rpoD, rpoS) P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) P(rpoS | arcA, rpoD) \\sum\\limits_{arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, narL, oxyR, rpoD, rpoS, soxS} \\sum\\limits_{aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO} P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO) \\sum\\limits_{arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS} \\sum\\limits_{aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO} P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO)$"
- ],
- "text/plain": [
- "Sum[arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS](P(arcA | rpoD) * P(crp | arcA, ihfA, rpoD, rpoS) * P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, oxyR, rpoD, rpoS, soxS) * P(ihfA | arcA, rpoD, rpoS) * P(ihfB | arcA, ihfA, rpoD, rpoS) * P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) * P(rpoS | arcA, rpoD) * Sum[arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, narL, oxyR, rpoD, rpoS, soxS](Sum[aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO](P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO))) * Sum[arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS](Sum[aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO](P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO))))"
- ]
+ "text/plain": "Sum[arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS](P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) * Sum[arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS](Sum[aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO](P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO))) * Sum[lrp, oxyR, soxS](P(arcA | modE, rpoD) * P(crp | arcA, ihfA, lrp, modE, rpoD, rpoS, soxS) * P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) * P(ihfA | arcA, lrp, modE, rpoD, rpoS) * P(ihfB | arcA, crp, ihfA, lrp, modE, rpoD, rpoS, soxS) * P(lrp | arcA, modE, rpoD, rpoS) * P(modE | rpoD) * P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) * P(oxyR | arcA, crp, ihfA, ihfB, lrp, modE, rpoD, rpoS, soxS) * P(rpoS | arcA, modE, rpoD) * P(soxS | arcA, ihfA, lrp, modE, rpoD, rpoS)))",
+ "text/latex": "$\\sum\\limits_{arcA, crp, dcuR, fnr, ihfA, ihfB, modE, narL, rpoD, rpoS} P(dcuR | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) P(dpiA | arcA, crp, dcuR, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoD, rpoS, soxS) \\sum\\limits_{arcA, crp, dcuR, dpiA, fnr, fur, ihfA, ihfB, lrp, modE, narL, oxyR, rpoS, soxS} \\sum\\limits_{aceE, amtB, appA, appB, appY, aspC, btsR, cirA, citC, cra, cspA, cydD, cyoA, dpiB, exuT, fis, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, iscR, mdh, narP, phoB, rpoH, ydeO} P(aceE, amtB, appA, appB, appY, arcA, aspC, btsR, cirA, citC, cra, crp, cspA, cydD, cyoA, dcuR, dpiA, dpiB, exuT, fis, fnr, fur, gadX, gutM, hcp, hns, hyaA, hyaB, hyaF, ihfA, ihfB, iscR, lrp, mdh, modE, narL, narP, oxyR, phoB, rpoD, rpoH, rpoS, soxS, ydeO) \\sum\\limits_{lrp, oxyR, soxS} P(arcA | modE, rpoD) P(crp | arcA, ihfA, lrp, modE, rpoD, rpoS, soxS) P(fnr | arcA, crp, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) P(ihfA | arcA, lrp, modE, rpoD, rpoS) P(ihfB | arcA, crp, ihfA, lrp, modE, rpoD, rpoS, soxS) P(lrp | arcA, modE, rpoD, rpoS) P(modE | rpoD) P(narL | arcA, crp, fnr, fur, ihfA, ihfB, lrp, modE, oxyR, rpoD, rpoS, soxS) P(oxyR | arcA, crp, ihfA, ihfB, lrp, modE, rpoD, rpoS, soxS) P(rpoS | arcA, modE, rpoD) P(soxS | arcA, ihfA, lrp, modE, rpoD, rpoS)$"
},
- "execution_count": 6,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -742,10 +296,7 @@
"cell_type": "markdown",
"id": "4abeb139d356f07c",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The query is identifiable. Hence, we can proceed to the next step."
@@ -755,10 +306,7 @@
"cell_type": "markdown",
"id": "c61266793a90058b",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 3: Find nuisance variables and mark them as latent"
@@ -768,10 +316,7 @@
"cell_type": "markdown",
"id": "55c4bfe949593a4a",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"This function finds the nuisance variables for the input graph."
@@ -779,16 +324,13 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 28,
"id": "80a72ccd9a8ce726",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:09.920296900Z",
- "start_time": "2024-01-18T15:49:09.890834200Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:15.339071700Z",
+ "start_time": "2024-05-03T00:38:15.147371500Z"
}
},
"outputs": [
@@ -801,31 +343,9 @@
},
{
"data": {
- "text/plain": [
- "[aceE,\n",
- " amtB,\n",
- " appA,\n",
- " appB,\n",
- " appY,\n",
- " aspC,\n",
- " cirA,\n",
- " citC,\n",
- " cydD,\n",
- " cyoA,\n",
- " dpiB,\n",
- " exuT,\n",
- " gadX,\n",
- " gutM,\n",
- " hcp,\n",
- " hns,\n",
- " hyaA,\n",
- " hyaB,\n",
- " hyaF,\n",
- " mdh,\n",
- " ydeO]"
- ]
+ "text/plain": "[aceE,\n amtB,\n appA,\n appB,\n appY,\n aspC,\n cirA,\n citC,\n cydD,\n cyoA,\n dpiB,\n exuT,\n gadX,\n gutM,\n hcp,\n hns,\n hyaA,\n hyaB,\n hyaF,\n mdh,\n ydeO]"
},
- "execution_count": 7,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -840,10 +360,7 @@
"cell_type": "markdown",
"id": "a10322ec91a50588",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 4: Simplify the network"
@@ -853,10 +370,7 @@
"cell_type": "markdown",
"id": "62b5c7bd4d5db516",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The following function find the nuisance variable (step 3), marks them as latent and then applies Evan's simplification rules to remove the nuisance variables."
@@ -864,2713 +378,20 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 29,
"id": "213d620224db3470",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:14.496076400Z",
- "start_time": "2024-01-18T15:49:14.452122800Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:38:18.348030500Z",
+ "start_time": "2024-05-03T00:38:15.180664900Z"
}
},
"outputs": [
{
"data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- ""
- ]
+ "text/plain": "",
+ "image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
@@ -3585,10 +406,7 @@
"cell_type": "markdown",
"id": "b281730e7c585e8",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Step 5: Estimate the query"
@@ -3596,26 +414,21 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 30,
"id": "d325cc3463fc975c",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:25.923745600Z",
- "start_time": "2024-01-18T15:49:23.487772600Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:47:57.420256300Z",
+ "start_time": "2024-05-03T00:38:18.345307600Z"
}
},
"outputs": [
{
"data": {
- "text/plain": [
- "RegressionResult(coefficients={crp: 0.2699805512369562, fur: 0.08136782815354308, lrp: 0.20910106148203042, oxyR: -0.023609523657045734, rpoD: 0.2393154368401349}, intercept=-2.7968549074150033)"
- ]
+ "text/plain": "RegressionResult(coefficients={crp: 0.3594042098545456, fur: 0.044351916282734175, oxyR: 0.0048560533728563655, rpoD: 0.16057485131792595, soxS: 0.04278193768977509}, intercept=-1.0375040603513126)"
},
- "execution_count": 9,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -3626,32 +439,27 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 31,
"id": "68423679d7276d84",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:29.058593400Z",
- "start_time": "2024-01-18T15:49:26.957634300Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T00:58:12.996523300Z",
+ "start_time": "2024-05-03T00:47:57.422364200Z"
}
},
"outputs": [
{
"data": {
- "text/plain": [
- "4.074327478157826"
- ]
+ "text/plain": "4.423519885608114"
},
- "execution_count": 10,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "estimate_query(\n",
+ "estimate_query_by_linear_regression(\n",
" new_graph,\n",
" data,\n",
" treatments=treatment,\n",
@@ -3665,10 +473,7 @@
"cell_type": "markdown",
"id": "e449c77f481e1f91",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The estimated value for the query $\\mathbb{E}[\\text{dpiA} \\mid \\text{fur} = 0]$ is 4.07. This shows that perturbation of gene fur has an increasing effect on gene dpiA."
@@ -3676,26 +481,21 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 32,
"id": "19f7320d83fafa13",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T15:49:42.753527600Z",
- "start_time": "2024-01-18T15:49:40.762997400Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T01:07:46.160640800Z",
+ "start_time": "2024-05-03T00:58:12.996523300Z"
}
},
"outputs": [
{
"data": {
- "text/plain": [
- "SummaryStatistics(size=258.0, mean=4.074327478157826, std=0.16183613278879422, min=3.6488141980278934, first_quartile=3.9595561559493286, second_quartile=4.0790513115584375, third_quartile=4.148437440188557, max=4.571693863329816)"
- ]
+ "text/plain": "SummaryStatistics(size=258.0, mean=4.423519885608114, std=0.15220522779231585, min=4.099885544063996, first_quartile=4.335753777686982, second_quartile=4.417174129467719, third_quartile=4.50370821066422, max=4.950793944054751)"
},
- "execution_count": 11,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -3714,10 +514,7 @@
"cell_type": "markdown",
"id": "428340f469b57d6b",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"The summary statistics provides more details regarding the query in the form of $P(\\text{dpiA} \\mid \\text{fur}=0)$. This result can be used to generate box plots that shows the estimated distribution of dpiA given a perturbation on fur."
@@ -3727,10 +524,7 @@
"cell_type": "markdown",
"id": "b1074b0af5438b98",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"## Evaluation criterion\n",
@@ -3742,26 +536,21 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 33,
"id": "7fd5c2b13fec2963",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-20T14:21:00.466315Z",
- "start_time": "2024-01-20T14:21:00.439125Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T01:07:46.180305100Z",
+ "start_time": "2024-05-03T01:07:46.160640800Z"
}
},
"outputs": [
{
"data": {
- "text/plain": [
- "-0.16938775510204082"
- ]
+ "text/plain": "-0.16938775510204082"
},
- "execution_count": 12,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -3773,1016 +562,27 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 34,
"id": "a7b310ea840d336c",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-18T16:02:03.818715200Z",
- "start_time": "2024-01-18T16:02:01.648965Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T01:17:22.697203Z",
+ "start_time": "2024-05-03T01:07:46.184118300Z"
}
},
"outputs": [
{
"data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- ""
- ]
+ "text/plain": "",
+ "image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
- "x = estimate_query(\n",
+ "x = estimate_query_by_linear_regression(\n",
" new_graph,\n",
" data,\n",
" treatments=treatment,\n",
@@ -4801,10 +601,7 @@
"cell_type": "markdown",
"id": "29942b780596716b",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"### Regression analysis of dpiA vs rpoS"
@@ -4812,1492 +609,20 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 35,
"id": "cdfc0d861ff71909",
"metadata": {
- "ExecuteTime": {
- "end_time": "2024-01-26T03:31:58.552926400Z",
- "start_time": "2024-01-26T03:31:57.652203900Z"
- },
"collapsed": false,
- "jupyter": {
- "outputs_hidden": false
+ "ExecuteTime": {
+ "end_time": "2024-05-03T01:17:24.144280Z",
+ "start_time": "2024-05-03T01:17:22.689857700Z"
}
},
"outputs": [
{
"data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- ""
- ]
+ "text/plain": "",
+ "image/svg+xml": "\n\n\n"
},
"metadata": {},
"output_type": "display_data"