forked from AIPI510/aipi510-fall24
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathta5_code_demo.py
143 lines (118 loc) · 7.35 KB
/
ta5_code_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from tabulate import tabulate
from termcolor import colored
import pytest
from unittest.mock import patch
import io
def show_data(df, title="Data Summary"):
"""Helper function to display a formatted pandas DataFrame."""
print(colored(f"\n{title}", 'cyan', attrs=['bold']))
print(tabulate(df, headers='keys', tablefmt='fancy_grid', showindex=False))
def simpsons_paradox_demo():
print(colored("Welcome to the Simpson's Paradox Demo!", 'green', attrs=['bold']))
# Step 1: Ask the user if they think the drug is effective before seeing any data
print(colored("\nWe are studying a new drug for a disease and want to determine its effectiveness.", 'yellow'))
user_opinion = input(colored("Do you believe the drug is effective? (yes/no): ", 'magenta')).strip().lower()
if user_opinion == "yes":
print(colored("\nYou seem confident about the drug's effectiveness. Let's see if the data confirms that!", 'yellow'))
elif user_opinion == "no":
print(colored("\nYou're skeptical about the drug. Let's take a look at the data to check.", 'yellow'))
else:
print(colored("\nI'll assume you are unsure. Let's analyze the data together.", 'yellow'))
# Step 2: Display the aggregated data
aggregated_data = {
'Treatment': ['Treated', 'Control'],
'Recovered': [20, 16],
'Did Not Recover': [20, 24],
'Total': [40, 40],
'Recovery Rate (%)': [50, 40]
}
show_data(aggregated_data, "Aggregated Data")
# Step 3: Ask the user to analyze the aggregated data
print(colored("\nBased on the aggregated data above:", 'yellow'))
aggregated_opinion = input(colored("Do you think the drug is effective now? (yes/no): ", 'magenta')).strip().lower()
if aggregated_opinion == "yes":
print(colored("\nThe aggregated data suggests that the drug is 10% more effective than the control.", 'yellow'))
print(colored("But let's dig deeper and see if this holds true for all groups.", 'yellow'))
elif aggregated_opinion == "no":
print(colored("\nYou still don't believe in the drug's effectiveness despite the aggregated data.", 'yellow'))
print(colored("Let's take a closer look at different subgroups to see if your skepticism is justified.", 'yellow'))
else:
print(colored("\nIt seems like you're still unsure. Let's analyze the data by subgroup.", 'yellow'))
# Step 4: Display disaggregated data for males and females
disaggregated_data_males = {
'Group': ['Treated', 'Control'],
'Recovered': [18, 7],
'Did Not Recover': [12, 3],
'Total': [30, 10],
'Recovery Rate (%)': [60, 70]
}
disaggregated_data_females = {
'Group': ['Treated', 'Control'],
'Recovered': [2, 9],
'Did Not Recover': [8, 21],
'Total': [10, 30],
'Recovery Rate (%)': [20, 30]
}
print(colored("\nNow, let's look at the data broken down by gender (disaggregated data):", 'yellow'))
show_data(disaggregated_data_males, "Disaggregated Data (Males)")
show_data(disaggregated_data_females, "Disaggregated Data (Females)")
# Step 5: Explain Simpson's Paradox and resolve the paradox
print(colored("\nSurprising, right?", 'cyan'))
if aggregated_opinion == "yes":
print(colored("Despite initially thinking the drug was effective based on the aggregated data,", 'yellow'))
print(colored("the disaggregated data shows that the recovery rate is actually lower for both males and females.", 'yellow'))
elif aggregated_opinion == "no":
print(colored("It looks like your skepticism was justified!", 'yellow'))
print(colored("The disaggregated data shows that the recovery rate is lower for both males and females, even though the aggregated data suggested otherwise.", 'yellow'))
print(colored("\nThis is an example of Simpson's Paradox.", 'cyan'))
print(colored("In this case, gender is a confounding factor, which masks the true relationship between the drug and recovery.", 'yellow'))
# Step 6: Final reflection
resolved_opinion = input(colored("Now that you see the disaggregated data, do you think the drug is effective? (yes/no): ", 'magenta')).strip().lower()
if resolved_opinion == "yes":
print(colored("\nInteresting! Despite the subgroup analysis, you still believe the drug is effective.", 'yellow'))
print(colored("Remember, it's important to look at the true causal relationships behind the data.", 'yellow'))
elif resolved_opinion == "no":
print(colored("\nIt seems you've changed your mind after seeing the disaggregated data.", 'yellow'))
print(colored("This is the power of analyzing confounders like gender. The drug is not effective for either group.", 'yellow'))
else:
print(colored("\nIt seems you're still unsure, but that's okay!", 'yellow'))
print(colored("Simpson's Paradox can be tricky, and it shows why deeper analysis is so important.", 'yellow'))
print(colored("\nThank you for participating in this Simpson's Paradox demo!", 'green', attrs=['bold']))
# Helper function to capture printed output
def run_demo_with_inputs(inputs):
with patch('builtins.input', side_effect=inputs):
with patch('sys.stdout', new=io.StringIO()) as fake_out:
simpsons_paradox_demo()
return fake_out.getvalue()
# Test cases for different user inputs
def test_demo_user_thinks_drug_effective_from_start():
inputs = ['yes', 'yes', 'no'] # User thinks drug is effective from the start
output = run_demo_with_inputs(inputs)
# Check for expected output at various steps
assert "You seem confident about the drug's effectiveness" in output
assert "The aggregated data suggests that the drug is 10% more effective" in output
assert "Surprising, right?" in output
assert "changed your mind after seeing the disaggregated data" in output
def test_demo_user_skeptical_from_start():
inputs = ['no', 'no', 'no'] # User is skeptical from the start
output = run_demo_with_inputs(inputs)
# Check for expected output at various steps
assert "You're skeptical about the drug." in output
assert "You still don't believe in the drug's effectiveness" in output
assert "It looks like your skepticism was justified!" in output
def test_demo_user_unsure_from_start():
inputs = ['', '', 'yes'] # User is unsure, then changes opinion
output = run_demo_with_inputs(inputs)
# Check for expected output at various steps
assert "I'll assume you are unsure. Let's analyze the data together." in output
assert "It seems like you're still unsure." in output
assert "Interesting! Despite the subgroup analysis, you still believe the drug is effective." in output
def test_demo_user_changes_mind():
inputs = ['yes', 'yes', 'no'] # User initially believes drug is effective, then changes mind
output = run_demo_with_inputs(inputs)
# Check for expected output at various steps
assert "You seem confident about the drug's effectiveness" in output
assert "Despite initially thinking the drug was effective based on the aggregated data" in output
assert "It seems you've changed your mind after seeing the disaggregated data." in output
if __name__ == "__main__":
simpsons_paradox_demo()