Skip to content

Commit

Permalink
added z-score calculator and visualizer
Browse files Browse the repository at this point in the history
  • Loading branch information
statkclee committed May 15, 2024
1 parent 2a4a8b8 commit 906a0a7
Show file tree
Hide file tree
Showing 12 changed files with 3,097 additions and 11 deletions.
292 changes: 292 additions & 0 deletions 04_testing/x_score/index.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
---
title: "x-점수 확률계산"
author: "이광춘"
date: today
image: thumbnail.png
categories: ["z-점수", "t-점수", "카이제곱-점수", "확률계산"]
editor_options:
chunk_output_type: console
---

"z-점수", "t-점수", "카이제곱-점수", 확률계산은 가설 검정과 신뢰구간 계산에서 유용합니다. 많은 통계적 가설 검정에서 z-점수를 사용하여 유의 확률(p-value)을 계산하며, 특정 z-점수에 해당하는 누적 확률을 빠르게 찾을 수 있습니다. 또한, 정규분포를 따르는 데이터의 신뢰구간 계산 시, z-점수를 사용하여 신뢰구간의 상한과 하한을 결정할 수 있습니다. t-점수는 주로 표본 크기가 작거나 모집단의 표준편차를 모를 때 사용되며, t-검정을 통해 평균 간의 차이를 검정하거나 신뢰구간을 계산하는 데 사용됩니다. 카이제곱-점수는 주로 범주형 데이터의 독립성 검정이나 적합도 검정에서 사용되며, 관측된 빈도와 기대 빈도 간의 차이를 평가하는 데 유용합니다.


# Shiny 앱

:::{.column-page}

```{shinylive-r}
#| label: shinylive-testing-score
#| viewerWidth: 800
#| viewerHeight: 700
#| standalone: true
library(shiny)
library(showtext)
showtext_auto()
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("통계 검정에 중요한 점수의 변화에 따른 확률 계산"),
tags$div(HTML("<script type='text/x-mathjax-config' >
MathJax.Hub.Config({
tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
});
</script >
")),
# Sidebar with inputs and options
sidebarLayout(
sidebarPanel(
radioButtons("scoreType", "점수 선택:",
c("Z-score" = "z",
"T-score" = "t",
"Chi-square 점수" = "chisq")),
conditionalPanel(
condition = "input.scoreType == 'z'",
sliderInput("z", "Z-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96)
),
conditionalPanel(
condition = "input.scoreType == 't'",
sliderInput("t", "T-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96),
numericInput("df_t", "자유도", value = 10, min = 1, step = 1)
),
conditionalPanel(
condition = "input.scoreType == 'chisq'",
sliderInput("chisq", "Chi-square 점수", min = 0, max = 20, step = 0.01, ticks = TRUE, value = 3.84),
numericInput("df_chisq", "자유도", value = 1, min = 1, step = 1)
),
withMathJax(),
p("$P(X \\leq x) =$"),
textOutput("prob"),
hr(),
p("신뢰수준 (양측):"),
textOutput("conf_level"),
p("대응하는 점수:"),
textOutput("score")
),
# Show a plot of the generated distribution
mainPanel(
plotOutput("plot")
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
score <- reactive({
switch(input$scoreType,
"z" = input$z,
"t" = input$t,
"chisq" = input$chisq)
})
output$prob <- renderPrint({
switch(input$scoreType,
"z" = pnorm(score()),
"t" = pt(score(), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq))
})
library(ggplot2)
library(gridExtra)
# manually save colors
col1 <- "#3B429F"
col2 <- "#76BED0"
col3 <- "#F55D3E"
output$plot <- renderPlot({
# useful "shader" function taken from: https://t-redactyl.io/blog/2016/03/creating-plots-in-r-using-ggplot2-part-9-function-plots.html
funcShaded <- function(x) {
y <- switch(input$scoreType,
"z" = dnorm(x),
"t" = dt(x, df = input$df_t),
"chisq" = dchisq(x, df = input$df_chisq))
y[x > score()] <- NA
return(y)
}
p1 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) +
stat_function(fun=funcShaded, geom="area", fill=col2, alpha=0.6) +
stat_function(fun = switch(input$scoreType,
"z" = dnorm,
"t" = function(x) dt(x, df = input$df_t),
"chisq" = function(x) dchisq(x, df = input$df_chisq)),
color=col1, size = 1.4) +
ggtitle(switch(input$scoreType,
"z" = "표준정규분포 확률 밀도 함수",
"t" = "t 분포 확률 밀도 함수",
"chisq" = "카이제곱 분포 확률 밀도 함수")) +
labs(x="", y="") +
theme_bw() +
scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5),
ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)),
expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 0.5), expand = c(0, 0)) +
geom_vline(xintercept=score(), lty=2, size=1.2, color=col3) +
annotate("text", x=ifelse(score()<0 | input$scoreType == "chisq", score() + 0.4, score() - 0.4),
y=funcShaded(score()) + 0.05, label=toupper(input$scoreType),
parse=TRUE, size=5, color=col3) +
theme(axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 20, family = "Tahoma", face = "bold"),
text=element_text(family="Tahoma"),
axis.text.x=element_text(colour="black", size = 11),
axis.text.y=element_text(colour="black", size = 11))
p2 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) +
annotate("segment", x=score(), xend=score(),
y=0, yend=switch(input$scoreType,
"z" = pnorm(score()),
"t" = pt(score(), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq)),
color=col3, lty=2, size=1.4) +
annotate("segment", x=ifelse(input$scoreType == "chisq", 0, -5), xend=score(),
y=switch(input$scoreType,
"z" = pnorm(score()),
"t" = pt(score(), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq)),
yend=switch(input$scoreType,
"z" = pnorm(score()),
"t" = pt(score(), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq)),
color=col2, lty=2, size=1.4) +
stat_function(fun = switch(input$scoreType,
"z" = pnorm,
"t" = function(x) pt(x, df = input$df_t),
"chisq" = function(x) pchisq(x, df = input$df_chisq)),
color=col1, size = 1.4) +
ggtitle(switch(input$scoreType,
"z" = "표준정규분포 누적 분포 함수",
"t" = "t 분포 누적 분포 함수",
"chisq" = "카이제곱 분포 누적 분포 함수")) +
labs(x="", y="") +
theme_bw() +
scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5),
ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)),
expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1.14), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), expand = c(0, 0)) +
annotate("text", x=score() + 0.4,
y=switch(input$scoreType,
"z" = pnorm(score()),
"t" = pt(score(), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq)) - 0.1,
label=toupper(input$scoreType),
parse=TRUE, size=5, color=col3) +
annotate("text", x=ifelse(input$scoreType == "chisq", 1, -3),
y=(switch(input$scoreType,
"z" = pnorm(score()),
"t" = pt(score(), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq)) + 0.05),
label=("'P(X' <= x ~ ')'"),
parse=TRUE, size=5, color=col2) +
theme(axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 20, family = "Tahoma", face = "bold"),
text=element_text(family="Tahoma"),
axis.text.x=element_text(colour="black", size = 11),
axis.text.y=element_text(colour="black", size = 11))
grid.arrange(p1, p2, nrow=1)
})
# 신뢰수준 계산
output$conf_level <- renderText({
conf_level <- round((1 - 2 * (1 - switch(input$scoreType,
"z" = pnorm(abs(score())),
"t" = pt(abs(score()), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq)))) * 100, 2)
paste0(conf_level, "%")
})
# 양측 검정에서의 점수 계산
output$score <- renderText({
conf_level <- (1 - 2 * (1 - switch(input$scoreType,
"z" = pnorm(abs(score())),
"t" = pt(abs(score()), df = input$df_t),
"chisq" = pchisq(score(), df = input$df_chisq))))
score_val <- round(switch(input$scoreType,
"z" = qnorm(1 - (1 - conf_level) / 2),
"t" = qt(1 - (1 - conf_level) / 2, df = input$df_t),
"chisq" = qchisq(conf_level, df = input$df_chisq)), 2)
paste0(score_val)
})
}
# Run the application
shinyApp(ui = ui, server = server)
```

:::

# 코딩

```{webr-r}
# Z-score에 대한 확률 계산 함수
calc_prob_z <- function(z) {
prob <- pnorm(z)
cat("P(X ≤", z, ") =", prob, "\n")
conf_level <- round((1 - 2 * (1 - pnorm(abs(z)))) * 100, 2)
cat("신뢰수준 (양측):", conf_level, "%\n")
score_val <- round(qnorm(1 - (1 - conf_level/100) / 2), 2)
cat("대응하는 점수:", score_val, "\n")
}
# t-score에 대한 확률 계산 함수
calc_prob_t <- function(t, df) {
prob <- pt(t, df)
cat("P(X ≤", t, ") =", prob, "\n")
conf_level <- round((1 - 2 * (1 - pt(abs(t), df))) * 100, 2)
cat("신뢰수준 (양측):", conf_level, "%\n")
score_val <- round(qt(1 - (1 - conf_level/100) / 2, df), 2)
cat("대응하는 점수:", score_val, "\n")
}
# 카이제곱-점수에 대한 확률 계산 함수
calc_prob_chisq <- function(chisq, df) {
prob <- pchisq(chisq, df)
cat("P(X ≤", chisq, ") =", prob, "\n")
conf_level <- round(pchisq(chisq, df) * 100, 2)
cat("신뢰수준 (양측):", conf_level, "%\n")
score_val <- round(qchisq(conf_level/100, df), 2)
cat("대응하는 점수:", score_val, "\n")
}
# Z-score 예시
calc_prob_z(1.96)
# P(X ≤ 1.96 ) = 0.9750021
# 신뢰수준 (양측): 95 %
# 대응하는 점수: 1.96
# t-score 예시
calc_prob_t(2.262, 10)
# P(X ≤ 2.262 ) = 0.9750022
# 신뢰수준 (양측): 95 %
# 대응하는 점수: 2.23
# 카이제곱-점수 예시
calc_prob_chisq(3.84, 1)
# P(X ≤ 3.84 ) = 0.9500042
# 신뢰수준 (양측): 95 %
# 대응하는 점수: 3.84
```


Loading

0 comments on commit 906a0a7

Please sign in to comment.