diff --git a/04_testing/x_score/index.qmd b/04_testing/x_score/index.qmd new file mode 100644 index 0000000..44c306b --- /dev/null +++ b/04_testing/x_score/index.qmd @@ -0,0 +1,292 @@ +--- +title: "x-점수 확률계산" +author: "이광춘" +date: today +image: thumbnail.png +categories: ["z-점수", "t-점수", "카이제곱-점수", "확률계산"] +editor_options: + chunk_output_type: console +--- + +"z-점수", "t-점수", "카이제곱-점수", 확률계산은 가설 검정과 신뢰구간 계산에서 유용합니다. 많은 통계적 가설 검정에서 z-점수를 사용하여 유의 확률(p-value)을 계산하며, 특정 z-점수에 해당하는 누적 확률을 빠르게 찾을 수 있습니다. 또한, 정규분포를 따르는 데이터의 신뢰구간 계산 시, z-점수를 사용하여 신뢰구간의 상한과 하한을 결정할 수 있습니다. t-점수는 주로 표본 크기가 작거나 모집단의 표준편차를 모를 때 사용되며, t-검정을 통해 평균 간의 차이를 검정하거나 신뢰구간을 계산하는 데 사용됩니다. 카이제곱-점수는 주로 범주형 데이터의 독립성 검정이나 적합도 검정에서 사용되며, 관측된 빈도와 기대 빈도 간의 차이를 평가하는 데 유용합니다. + + +# Shiny 앱 + +:::{.column-page} + +```{shinylive-r} +#| label: shinylive-testing-score +#| viewerWidth: 800 +#| viewerHeight: 700 +#| standalone: true + +library(shiny) +library(showtext) +showtext_auto() + +# Define UI for application that draws a histogram +ui <- fluidPage( + # Application title + titlePanel("통계 검정에 중요한 점수의 변화에 따른 확률 계산"), + + tags$div(HTML(" + ")), + + # Sidebar with inputs and options + sidebarLayout( + sidebarPanel( + radioButtons("scoreType", "점수 선택:", + c("Z-score" = "z", + "T-score" = "t", + "Chi-square 점수" = "chisq")), + conditionalPanel( + condition = "input.scoreType == 'z'", + sliderInput("z", "Z-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96) + ), + conditionalPanel( + condition = "input.scoreType == 't'", + sliderInput("t", "T-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96), + numericInput("df_t", "자유도", value = 10, min = 1, step = 1) + ), + conditionalPanel( + condition = "input.scoreType == 'chisq'", + sliderInput("chisq", "Chi-square 점수", min = 0, max = 20, step = 0.01, ticks = TRUE, value = 3.84), + numericInput("df_chisq", "자유도", value = 1, min = 1, step = 1) + ), + withMathJax(), + p("$P(X \\leq x) =$"), + textOutput("prob"), + hr(), + p("신뢰수준 (양측):"), + textOutput("conf_level"), + p("대응하는 점수:"), + textOutput("score") + ), + + # Show a plot of the generated distribution + mainPanel( + plotOutput("plot") + ) + ) +) + +# Define server logic required to draw a histogram +server <- function(input, output) { + + score <- reactive({ + switch(input$scoreType, + "z" = input$z, + "t" = input$t, + "chisq" = input$chisq) + }) + + output$prob <- renderPrint({ + switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)) + }) + + library(ggplot2) + library(gridExtra) + + # manually save colors + col1 <- "#3B429F" + col2 <- "#76BED0" + col3 <- "#F55D3E" + + output$plot <- renderPlot({ + # useful "shader" function taken from: https://t-redactyl.io/blog/2016/03/creating-plots-in-r-using-ggplot2-part-9-function-plots.html + funcShaded <- function(x) { + y <- switch(input$scoreType, + "z" = dnorm(x), + "t" = dt(x, df = input$df_t), + "chisq" = dchisq(x, df = input$df_chisq)) + y[x > score()] <- NA + return(y) + } + + p1 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) + + stat_function(fun=funcShaded, geom="area", fill=col2, alpha=0.6) + + stat_function(fun = switch(input$scoreType, + "z" = dnorm, + "t" = function(x) dt(x, df = input$df_t), + "chisq" = function(x) dchisq(x, df = input$df_chisq)), + color=col1, size = 1.4) + + ggtitle(switch(input$scoreType, + "z" = "표준정규분포 확률 밀도 함수", + "t" = "t 분포 확률 밀도 함수", + "chisq" = "카이제곱 분포 확률 밀도 함수")) + + labs(x="", y="") + + theme_bw() + + scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5), + ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)), + expand = c(0, 0)) + + scale_y_continuous(limits = c(0, 0.5), expand = c(0, 0)) + + geom_vline(xintercept=score(), lty=2, size=1.2, color=col3) + + annotate("text", x=ifelse(score()<0 | input$scoreType == "chisq", score() + 0.4, score() - 0.4), + y=funcShaded(score()) + 0.05, label=toupper(input$scoreType), + parse=TRUE, size=5, color=col3) + + theme(axis.line = element_line(size=1, colour = "black"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_blank(), + panel.background = element_blank(), + plot.title = element_text(size = 20, family = "Tahoma", face = "bold"), + text=element_text(family="Tahoma"), + axis.text.x=element_text(colour="black", size = 11), + axis.text.y=element_text(colour="black", size = 11)) + + p2 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) + + annotate("segment", x=score(), xend=score(), + y=0, yend=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)), + color=col3, lty=2, size=1.4) + + annotate("segment", x=ifelse(input$scoreType == "chisq", 0, -5), xend=score(), + y=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)), + yend=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)), + color=col2, lty=2, size=1.4) + + stat_function(fun = switch(input$scoreType, + "z" = pnorm, + "t" = function(x) pt(x, df = input$df_t), + "chisq" = function(x) pchisq(x, df = input$df_chisq)), + color=col1, size = 1.4) + + ggtitle(switch(input$scoreType, + "z" = "표준정규분포 누적 분포 함수", + "t" = "t 분포 누적 분포 함수", + "chisq" = "카이제곱 분포 누적 분포 함수")) + + labs(x="", y="") + + theme_bw() + + scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5), + ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)), + expand = c(0, 0)) + + scale_y_continuous(limits = c(0, 1.14), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), expand = c(0, 0)) + + annotate("text", x=score() + 0.4, + y=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)) - 0.1, + label=toupper(input$scoreType), + parse=TRUE, size=5, color=col3) + + annotate("text", x=ifelse(input$scoreType == "chisq", 1, -3), + y=(switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)) + 0.05), + label=("'P(X' <= x ~ ')'"), + parse=TRUE, size=5, color=col2) + + theme(axis.line = element_line(size=1, colour = "black"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_blank(), + panel.background = element_blank(), + plot.title = element_text(size = 20, family = "Tahoma", face = "bold"), + text=element_text(family="Tahoma"), + axis.text.x=element_text(colour="black", size = 11), + axis.text.y=element_text(colour="black", size = 11)) + + grid.arrange(p1, p2, nrow=1) + }) + + # 신뢰수준 계산 + output$conf_level <- renderText({ + conf_level <- round((1 - 2 * (1 - switch(input$scoreType, + "z" = pnorm(abs(score())), + "t" = pt(abs(score()), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)))) * 100, 2) + paste0(conf_level, "%") + }) + + # 양측 검정에서의 점수 계산 + output$score <- renderText({ + conf_level <- (1 - 2 * (1 - switch(input$scoreType, + "z" = pnorm(abs(score())), + "t" = pt(abs(score()), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)))) + score_val <- round(switch(input$scoreType, + "z" = qnorm(1 - (1 - conf_level) / 2), + "t" = qt(1 - (1 - conf_level) / 2, df = input$df_t), + "chisq" = qchisq(conf_level, df = input$df_chisq)), 2) + paste0(score_val) + }) +} + +# Run the application +shinyApp(ui = ui, server = server) + +``` + +::: + +# 코딩 + +```{webr-r} +# Z-score에 대한 확률 계산 함수 +calc_prob_z <- function(z) { + prob <- pnorm(z) + cat("P(X ≤", z, ") =", prob, "\n") + + conf_level <- round((1 - 2 * (1 - pnorm(abs(z)))) * 100, 2) + cat("신뢰수준 (양측):", conf_level, "%\n") + + score_val <- round(qnorm(1 - (1 - conf_level/100) / 2), 2) + cat("대응하는 점수:", score_val, "\n") +} + +# t-score에 대한 확률 계산 함수 +calc_prob_t <- function(t, df) { + prob <- pt(t, df) + cat("P(X ≤", t, ") =", prob, "\n") + + conf_level <- round((1 - 2 * (1 - pt(abs(t), df))) * 100, 2) + cat("신뢰수준 (양측):", conf_level, "%\n") + + score_val <- round(qt(1 - (1 - conf_level/100) / 2, df), 2) + cat("대응하는 점수:", score_val, "\n") +} + +# 카이제곱-점수에 대한 확률 계산 함수 +calc_prob_chisq <- function(chisq, df) { + prob <- pchisq(chisq, df) + cat("P(X ≤", chisq, ") =", prob, "\n") + + conf_level <- round(pchisq(chisq, df) * 100, 2) + cat("신뢰수준 (양측):", conf_level, "%\n") + + score_val <- round(qchisq(conf_level/100, df), 2) + cat("대응하는 점수:", score_val, "\n") +} + +# Z-score 예시 +calc_prob_z(1.96) +# P(X ≤ 1.96 ) = 0.9750021 +# 신뢰수준 (양측): 95 % +# 대응하는 점수: 1.96 + +# t-score 예시 +calc_prob_t(2.262, 10) +# P(X ≤ 2.262 ) = 0.9750022 +# 신뢰수준 (양측): 95 % +# 대응하는 점수: 2.23 + +# 카이제곱-점수 예시 +calc_prob_chisq(3.84, 1) +# P(X ≤ 3.84 ) = 0.9500042 +# 신뢰수준 (양측): 95 % +# 대응하는 점수: 3.84 +``` + + diff --git a/04_testing/x_score/shiny/app.R b/04_testing/x_score/shiny/app.R new file mode 100644 index 0000000..734ddc0 --- /dev/null +++ b/04_testing/x_score/shiny/app.R @@ -0,0 +1,205 @@ +library(shiny) +library(showtext) +showtext_auto() + +# Define UI for application that draws a histogram +ui <- fluidPage( + # Application title + titlePanel("통계 검정에 중요한 점수의 변화에 따른 확률 계산"), + + tags$div(HTML(" + ")), + + # Sidebar with inputs and options + sidebarLayout( + sidebarPanel( + radioButtons("scoreType", "점수 선택:", + c("Z-score" = "z", + "T-score" = "t", + "Chi-square 점수" = "chisq")), + conditionalPanel( + condition = "input.scoreType == 'z'", + sliderInput("z", "Z-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96) + ), + conditionalPanel( + condition = "input.scoreType == 't'", + sliderInput("t", "T-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96), + numericInput("df_t", "자유도", value = 10, min = 1, step = 1) + ), + conditionalPanel( + condition = "input.scoreType == 'chisq'", + sliderInput("chisq", "Chi-square 점수", min = 0, max = 20, step = 0.01, ticks = TRUE, value = 3.84), + numericInput("df_chisq", "자유도", value = 1, min = 1, step = 1) + ), + withMathJax(), + p("$P(X \\leq x) =$"), + textOutput("prob"), + hr(), + p("신뢰수준 (양측):"), + textOutput("conf_level"), + p("대응하는 점수:"), + textOutput("score") + ), + + # Show a plot of the generated distribution + mainPanel( + plotOutput("plot") + ) + ) +) + +# Define server logic required to draw a histogram +server <- function(input, output) { + + score <- reactive({ + switch(input$scoreType, + "z" = input$z, + "t" = input$t, + "chisq" = input$chisq) + }) + + output$prob <- renderPrint({ + switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)) + }) + + library(ggplot2) + library(gridExtra) + + # manually save colors + col1 <- "#3B429F" + col2 <- "#76BED0" + col3 <- "#F55D3E" + + output$plot <- renderPlot({ + # useful "shader" function taken from: https://t-redactyl.io/blog/2016/03/creating-plots-in-r-using-ggplot2-part-9-function-plots.html + funcShaded <- function(x) { + y <- switch(input$scoreType, + "z" = dnorm(x), + "t" = dt(x, df = input$df_t), + "chisq" = dchisq(x, df = input$df_chisq)) + y[x > score()] <- NA + return(y) + } + + p1 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) + + stat_function(fun=funcShaded, geom="area", fill=col2, alpha=0.6) + + stat_function(fun = switch(input$scoreType, + "z" = dnorm, + "t" = function(x) dt(x, df = input$df_t), + "chisq" = function(x) dchisq(x, df = input$df_chisq)), + color=col1, size = 1.4) + + ggtitle(switch(input$scoreType, + "z" = "표준정규분포 확률 밀도 함수", + "t" = "t 분포 확률 밀도 함수", + "chisq" = "카이제곱 분포 확률 밀도 함수")) + + labs(x="", y="") + + theme_bw() + + scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5), + ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)), + expand = c(0, 0)) + + scale_y_continuous(limits = c(0, 0.5), expand = c(0, 0)) + + geom_vline(xintercept=score(), lty=2, size=1.2, color=col3) + + annotate("text", x=ifelse(score()<0 | input$scoreType == "chisq", score() + 0.4, score() - 0.4), + y=funcShaded(score()) + 0.05, label=toupper(input$scoreType), + parse=TRUE, size=5, color=col3) + + theme(axis.line = element_line(size=1, colour = "black"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_blank(), + panel.background = element_blank(), + plot.title = element_text(size = 20, family = "Tahoma", face = "bold"), + text=element_text(family="Tahoma"), + axis.text.x=element_text(colour="black", size = 11), + axis.text.y=element_text(colour="black", size = 11)) + + p2 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) + + annotate("segment", x=score(), xend=score(), + y=0, yend=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)), + color=col3, lty=2, size=1.4) + + annotate("segment", x=ifelse(input$scoreType == "chisq", 0, -5), xend=score(), + y=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)), + yend=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)), + color=col2, lty=2, size=1.4) + + stat_function(fun = switch(input$scoreType, + "z" = pnorm, + "t" = function(x) pt(x, df = input$df_t), + "chisq" = function(x) pchisq(x, df = input$df_chisq)), + color=col1, size = 1.4) + + ggtitle(switch(input$scoreType, + "z" = "표준정규분포 누적 분포 함수", + "t" = "t 분포 누적 분포 함수", + "chisq" = "카이제곱 분포 누적 분포 함수")) + + labs(x="", y="") + + theme_bw() + + scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5), + ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)), + expand = c(0, 0)) + + scale_y_continuous(limits = c(0, 1.14), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), expand = c(0, 0)) + + annotate("text", x=score() + 0.4, + y=switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)) - 0.1, + label=toupper(input$scoreType), + parse=TRUE, size=5, color=col3) + + annotate("text", x=ifelse(input$scoreType == "chisq", 1, -3), + y=(switch(input$scoreType, + "z" = pnorm(score()), + "t" = pt(score(), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)) + 0.05), + label=("'P(X' <= x ~ ')'"), + parse=TRUE, size=5, color=col2) + + theme(axis.line = element_line(size=1, colour = "black"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_blank(), + panel.background = element_blank(), + plot.title = element_text(size = 20, family = "Tahoma", face = "bold"), + text=element_text(family="Tahoma"), + axis.text.x=element_text(colour="black", size = 11), + axis.text.y=element_text(colour="black", size = 11)) + + grid.arrange(p1, p2, nrow=1) + }) + + # 신뢰수준 계산 + output$conf_level <- renderText({ + conf_level <- round((1 - 2 * (1 - switch(input$scoreType, + "z" = pnorm(abs(score())), + "t" = pt(abs(score()), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)))) * 100, 2) + paste0(conf_level, "%") + }) + + # 양측 검정에서의 점수 계산 + output$score <- renderText({ + conf_level <- (1 - 2 * (1 - switch(input$scoreType, + "z" = pnorm(abs(score())), + "t" = pt(abs(score()), df = input$df_t), + "chisq" = pchisq(score(), df = input$df_chisq)))) + score_val <- round(switch(input$scoreType, + "z" = qnorm(1 - (1 - conf_level) / 2), + "t" = qt(1 - (1 - conf_level) / 2, df = input$df_t), + "chisq" = qchisq(conf_level, df = input$df_chisq)), 2) + paste0(score_val) + }) +} + +# Run the application +shinyApp(ui = ui, server = server) diff --git a/04_testing/x_score/thumbnail.png b/04_testing/x_score/thumbnail.png new file mode 100644 index 0000000..947eafb Binary files /dev/null and b/04_testing/x_score/thumbnail.png differ diff --git a/05_infer/ci/shiny/app.R b/05_infer/ci/shiny/app.R index 54789d5..1559d93 100644 --- a/05_infer/ci/shiny/app.R +++ b/05_infer/ci/shiny/app.R @@ -1,6 +1,8 @@ library(shiny) library(ggplot2) library(tidyverse) +library(showtext) +showtext_auto() ui <- fluidPage( title = "신뢰구간 모의실험", diff --git a/app.R b/app.R new file mode 100644 index 0000000..0303f46 --- /dev/null +++ b/app.R @@ -0,0 +1,189 @@ +library(shiny) + +library(ggplot2) +library(grid) +library(gridExtra) + +# Define UI for application that draws a histogram +ui <- fluidPage( + + # Application title + titlePanel("Bayes' Rule Visualization"), + + + tags$div(HTML(" + ")), + + verticalLayout( + withMathJax(), + p(""), + p("Suppose that for two events, $\\color{red}{A}$ and $\\color{blue}{B}$, we know $P(\\color{red}{A})$, $P(\\color{blue}{B})$, and $P(\\color{blue}{B}|\\color{red}{A})$... can we find $P(\\color{red}{A}|\\color{blue}{B})$?"), + p("Yes! To see why, remember that we can write $P(\\color{red}{A} \\& \\color{blue}{B})$ in two ways: $ P(\\color{blue}{B}|\\color{red}{A}) \\times P(\\color{red}{A}) = P(\\color{red}{A} \\& \\color{blue}{B}) = P(\\color{red}{A}|\\color{blue}{B}) \\times P(\\color{blue}{B}) $"), + p("This means that the yellow area on both squares equals $P(\\color{red}{A} \\& \\color{blue}{B})$. So if we have $P(\\color{red}{A})$, $P(\\color{blue}{B})$, and $P(\\color{blue}{B}|\\color{red}{A})$, then we can calculate: $ P(\\color{red}{A}|\\color{blue}{B}) = \\frac{ P(\\color{blue}{B}|\\color{red}{A}) \\times P(\\color{red}{A}) }{P(\\color{blue}{B})} $"), + p(""), + p(""), + p(""), + + # Sidebar with a slider input for number of bins + sidebarLayout( + sidebarPanel( + sliderInput("p.a", "Probability of A:", value=0.5, step=0.01, min = 0, max = 1), + sliderInput("p.b", "Probability of B:", value=0.8, step=0.01, min = 0, max = 1), + sliderInput("p.b.given.a", "Probability of B given A:", value=0.6, step=0.01, min = 0, max = 1) + ), + + # Show a plot of the generated distribution + mainPanel( + + # Output: Tabset w/ plot, summary, and table ---- + tabsetPanel(type = "tabs", + tabPanel("Visualization", br(), plotOutput("bayes_viz")), + tabPanel("Raw numbers", br(), br(), br(), tableOutput("table")) + ), + + ) + ) + ) +) + +# Define server logic +server <- function(input, output) { + + + output$table <- renderTable({ + p.a <- input$p.a + p.b <- input$p.b + p.b.given.a <- input$p.b.given.a + p.a.and.b <- p.b.given.a * p.a + p.a.given.b <- (p.b.given.a * p.a)/p.b + dat <- cbind(p.a, p.b, p.b.given.a, p.a.and.b, p.a.given.b) + datt <- t(dat) + datt <- round(datt, digits=3) + labels <- c("P(A)", "P(B)", "P(B|A)", "P(A&B)", "P(A|B)") + type <- c("Input", "Input", "Input", "Output", "Output") + datt <- cbind(labels, datt, type) + colnames(datt) <- c("Variable", "Value", "Type") + datt + }) + + + output$bayes_viz <- renderPlot({ + + p.a <- input$p.a + p.b <- input$p.b + p.b.given.a <- input$p.b.given.a + # use Bayes' rule to compute P(A|B) + p.a.given.b <- (p.b.given.a * p.a)/p.b + p.a.and.b <- p.b.given.a * p.a + + # create dataset for blank square plot + x <- c(0,1) + y <- c(0,1) + df <- expand.grid(x, y) + names(df) <- c("x", "y") + + + p <- ggplot(df) + + # bottom left + geom_rect(xmin = 0, xmax = p.a, ymin = 0, ymax = 1-p.b.given.a, fill = "#a6cee3") + + # top left + geom_rect(xmin = 0, xmax = p.a, ymin = 1-p.b.given.a, ymax = 1, fill = "#f3f470") + + # RHS (not broken up into 2 blocks, because we don't actually need to know P(B|¬A)) + geom_rect(xmin = p.a, xmax = 1, ymin = 0, ymax = 1, fill = "#1f78b4") + + labs(title="Factoring by P(A)") + + coord_cartesian(clip = "off") + coord_fixed(ratio=1, ylim=c(0, 1.3), xlim=c(-0.3, 1)) + + theme(plot.margin= unit(c(-3,0,0,0), "lines"), + plot.title = element_text(margin=margin(b = -31, unit = "pt"), face="bold", size=18), + panel.background = element_rect(fill = "white", + colour = "white")) + + + p1 = p + annotation_custom(grob = textGrob("P(B | A)"), + xmin = -0.2, xmax = -0.2, ymin = 1-p.b.given.a, ymax = 1) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = -0.05, xmax = -0.05, ymin = 1-p.b.given.a, ymax = 1) + + p1 = p1 + annotation_custom(grob = textGrob("P(¬B | A)"), + xmin = -0.2, xmax = -0.2, ymin = 0, ymax = 1-p.b.given.a) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = -0.05, xmax = -0.05, ymin = 0, ymax = 1-p.b.given.a) + + + p1 = p1 + annotation_custom(grob = textGrob("P(A)"), + xmin = 0, xmax = p.a, ymin = 1.1, ymax = 1.1) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = 0, xmax = p.a, ymin = 1.05, ymax=1.05) + + p1 = p1 + annotation_custom(grob = textGrob("P(¬A)"), + xmin = p.a, xmax = 1, ymin = 1.1, ymax = 1.1) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = p.a, xmax = 1, ymin = 1.05, ymax=1.05) + + + p1 = p1 + annotation_custom(grob = textGrob("P(A&B)"), + xmin = 0, xmax = p.a, ymin = 1-p.b.given.a, ymax = 1) + + p2 <- ggplot(df) + + # bottom left + geom_rect(xmin = 0, xmax = p.b, ymin = 0, ymax = 1-p.a.given.b, fill = "#b7aef0") + + # top left + geom_rect(xmin = 0, xmax = p.b, ymin = 1-p.a.given.b, ymax = 1, fill = "#f3f470") + + # RHS (not broken up into 2 blocks, because we don't actually need to know P(B|¬A)) + geom_rect(xmin = p.b, xmax = 1, ymin = 0, ymax = 1, fill = "#7b72b8") + + labs(title="Factoring by P(B)") + + coord_cartesian(clip = "off") + coord_fixed(ratio=1, ylim=c(0, 1.3), xlim=c(-0.3, 1)) + + theme(plot.margin= unit(c(-3,0,0,0), "lines"), + plot.title = element_text(margin=margin(b = -31, unit = "pt"), face="bold", size=18), + panel.background = element_rect(fill = "white", + colour = "white")) + + + p2 = p2 + annotation_custom(grob = rectGrob(gp=gpar(fill="red", alpha=0.5)), + xmin = -0.33, xmax = -0.08, ymin = 1-p.a.given.b/2 -0.04, ymax = 1-p.a.given.b/2 +0.04) + + annotation_custom(grob=textGrob("P(A | B)"), + xmin = -0.3, xmax = -0.1, ymin = 1-p.a.given.b, ymax = 1) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = -0.05, xmax = -0.05, ymin = 1-p.a.given.b, ymax = 1) + # add box to highlight the unknown quantity P(A|B) + + p2 = p2 + annotation_custom(grob = textGrob("P(¬A | B)"), + xmin = -0.2, xmax = -0.2, ymin = 0, ymax = 1-p.a.given.b) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = -0.05, xmax = -0.05, ymin = 0, ymax = 1-p.a.given.b) + + p2 = p2 + annotation_custom(grob = textGrob("P(B)"), + xmin = 0, xmax = p.b, ymin = 1.1, ymax = 1.1) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = 0, xmax = p.b, ymin = 1.05, ymax=1.05) + + p2 = p2 + annotation_custom(grob = textGrob("P(¬B)"), + xmin = p.b, xmax = 1, ymin = 1.1, ymax = 1.1) + + annotation_custom(grob = linesGrob(arrow=arrow(type="open", ends="both", length=unit(2,"mm")), + gp=gpar(col="black", lwd=1.5)), + xmin = p.b, xmax = 1, ymin = 1.05, ymax=1.05) + + + p2 = p2 + annotation_custom(grob = textGrob("P(A&B)"), + xmin = 0, xmax = p.b, ymin = 1-p.a.given.b, ymax = 1) + + + bayes_viz = grid.arrange(p1, p2, ncol=2) + + bayes_viz + + }) +} + +# Run the application +shinyApp(ui = ui, server = server) diff --git a/docs/04_testing.html b/docs/04_testing.html index f7a54bc..6ab0a14 100644 --- a/docs/04_testing.html +++ b/docs/04_testing.html @@ -252,7 +252,7 @@

통계 검정

+
분류
전체 (5)
2-비율 z-검정 (1)
t-점수 (1)
z-점수 (1)
단일 비율 t-검정 (1)
단일 비율 z-검정 (1)
단일 평균 t-검정 (1)
단일 평균 z-검정 (1)
단일 표본 t-검정 (1)
대응 표본 t-검정 (1)
독립 표본 t-검정 (1)
비율 검정 (2)
카이제곱-점수 (1)
평균 검정 (2)
확률계산 (1)
@@ -291,7 +291,7 @@
분류
-
-
+
-
+
-
+ +
+ + + +
+ +
+

+ +

+
+
+x-점수 확률계산 +
+
+
+z-점수 +
+
+t-점수 +
+
+카이제곱-점수 +
+
+확률계산 +
+
+
+“z-점수”, “t-점수”, “카이제곱-점수”, 확률계산은 가설 검정과 신뢰구간 계산에서 유용합니다. 많은 통계적 가설 검정에서 z-점수를 사용하여 유의 확률(p-value)을 계산하며, 특정 z-점수에 해당하는 누적 확률을 빠르게 찾을 수 있습니다. 또한, 정규분포를 따르는 데이터의 신뢰구간 계산 시… +
+
+
+이광춘 +
+
+2024-05-15
diff --git a/docs/04_testing/x_score/index.html b/docs/04_testing/x_score/index.html new file mode 100644 index 0000000..9ddd1b9 --- /dev/null +++ b/docs/04_testing/x_score/index.html @@ -0,0 +1,2245 @@ + + + + + + + + + + + +빗스탯2 - x-점수 확률계산 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+
+

x-점수 확률계산

+
+
z-점수
+
t-점수
+
카이제곱-점수
+
확률계산
+
+
+
+ + +
+ +
+
저자
+
+

이광춘

+
+
+ +
+
공개
+
+

2024-05-15

+
+
+ + +
+ + +
+ + + + +
+ + + + + + + + + +

“z-점수”, “t-점수”, “카이제곱-점수”, 확률계산은 가설 검정과 신뢰구간 계산에서 유용합니다. 많은 통계적 가설 검정에서 z-점수를 사용하여 유의 확률(p-value)을 계산하며, 특정 z-점수에 해당하는 누적 확률을 빠르게 찾을 수 있습니다. 또한, 정규분포를 따르는 데이터의 신뢰구간 계산 시, z-점수를 사용하여 신뢰구간의 상한과 하한을 결정할 수 있습니다. t-점수는 주로 표본 크기가 작거나 모집단의 표준편차를 모를 때 사용되며, t-검정을 통해 평균 간의 차이를 검정하거나 신뢰구간을 계산하는 데 사용됩니다. 카이제곱-점수는 주로 범주형 데이터의 독립성 검정이나 적합도 검정에서 사용되며, 관측된 빈도와 기대 빈도 간의 차이를 평가하는 데 유용합니다.

+
+

1 Shiny 앱

+
+
#| label: shinylive-testing-score
+#| viewerWidth: 800
+#| viewerHeight: 700
+#| standalone: true
+
+library(shiny)
+library(showtext)
+showtext_auto()  
+
+# Define UI for application that draws a histogram
+ui <- fluidPage(
+  # Application title
+  titlePanel("통계 검정에 중요한 점수의 변화에 따른 확률 계산"),
+
+  tags$div(HTML("<script type='text/x-mathjax-config' >
+            MathJax.Hub.Config({
+            tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
+            });
+            </script >
+            ")),
+
+  # Sidebar with inputs and options
+  sidebarLayout(
+    sidebarPanel(
+      radioButtons("scoreType", "점수 선택:",
+                   c("Z-score" = "z",
+                     "T-score" = "t",
+                     "Chi-square 점수" = "chisq")),
+      conditionalPanel(
+        condition = "input.scoreType == 'z'",
+        sliderInput("z", "Z-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96)
+      ),
+      conditionalPanel(
+        condition = "input.scoreType == 't'",
+        sliderInput("t", "T-score", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96),
+        numericInput("df_t", "자유도", value = 10, min = 1, step = 1)
+      ),
+      conditionalPanel(
+        condition = "input.scoreType == 'chisq'",
+        sliderInput("chisq", "Chi-square 점수", min = 0, max = 20, step = 0.01, ticks = TRUE, value = 3.84),
+        numericInput("df_chisq", "자유도", value = 1, min = 1, step = 1)
+      ),
+      withMathJax(),
+      p("$P(X \\leq x) =$"),
+      textOutput("prob"),
+      hr(),
+      p("신뢰수준 (양측):"),
+      textOutput("conf_level"),
+      p("대응하는 점수:"),
+      textOutput("score")
+    ),
+
+    # Show a plot of the generated distribution
+    mainPanel(
+      plotOutput("plot")
+    )
+  )
+)
+
+# Define server logic required to draw a histogram
+server <- function(input, output) {
+
+  score <- reactive({
+    switch(input$scoreType,
+           "z" = input$z,
+           "t" = input$t,
+           "chisq" = input$chisq)
+  })
+
+  output$prob <- renderPrint({
+    switch(input$scoreType,
+           "z" = pnorm(score()),
+           "t" = pt(score(), df = input$df_t),
+           "chisq" = pchisq(score(), df = input$df_chisq))
+  })
+
+  library(ggplot2)
+  library(gridExtra)
+
+  # manually save colors
+  col1 <- "#3B429F"
+  col2 <- "#76BED0"
+  col3 <- "#F55D3E"
+
+  output$plot <- renderPlot({
+    # useful "shader" function taken from: https://t-redactyl.io/blog/2016/03/creating-plots-in-r-using-ggplot2-part-9-function-plots.html
+    funcShaded <- function(x) {
+      y <- switch(input$scoreType,
+                  "z" = dnorm(x),
+                  "t" = dt(x, df = input$df_t),
+                  "chisq" = dchisq(x, df = input$df_chisq))
+      y[x > score()] <- NA
+      return(y)
+    }
+
+    p1 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) +
+      stat_function(fun=funcShaded, geom="area", fill=col2, alpha=0.6) +
+      stat_function(fun = switch(input$scoreType,
+                                 "z" = dnorm,
+                                 "t" = function(x) dt(x, df = input$df_t),
+                                 "chisq" = function(x) dchisq(x, df = input$df_chisq)),
+                    color=col1, size = 1.4) +
+      ggtitle(switch(input$scoreType,
+                     "z" = "표준정규분포 확률 밀도 함수",
+                     "t" = "t 분포 확률 밀도 함수",
+                     "chisq" = "카이제곱 분포 확률 밀도 함수")) +
+      labs(x="", y="") +
+      theme_bw() +
+      scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5),
+                                    ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)),
+                         expand = c(0, 0)) +
+      scale_y_continuous(limits = c(0, 0.5), expand = c(0, 0)) +
+      geom_vline(xintercept=score(), lty=2, size=1.2, color=col3) +
+      annotate("text", x=ifelse(score()<0 | input$scoreType == "chisq", score() + 0.4, score() - 0.4),
+               y=funcShaded(score()) + 0.05, label=toupper(input$scoreType),
+               parse=TRUE, size=5, color=col3) +
+      theme(axis.line = element_line(size=1, colour = "black"),
+            panel.grid.major = element_blank(),
+            panel.grid.minor = element_blank(),
+            panel.border = element_blank(),
+            panel.background = element_blank(),
+            plot.title = element_text(size = 20, family = "Tahoma", face = "bold"),
+            text=element_text(family="Tahoma"),
+            axis.text.x=element_text(colour="black", size = 11),
+            axis.text.y=element_text(colour="black", size = 11))
+
+    p2 <- ggplot(data.frame(x = c(ifelse(input$scoreType == "chisq", 0, -20), 20)), aes(x = x)) +
+      annotate("segment", x=score(), xend=score(),
+               y=0, yend=switch(input$scoreType,
+                                "z" = pnorm(score()),
+                                "t" = pt(score(), df = input$df_t),
+                                "chisq" = pchisq(score(), df = input$df_chisq)),
+               color=col3, lty=2, size=1.4) +
+      annotate("segment", x=ifelse(input$scoreType == "chisq", 0, -5), xend=score(),
+               y=switch(input$scoreType,
+                        "z" = pnorm(score()),
+                        "t" = pt(score(), df = input$df_t),
+                        "chisq" = pchisq(score(), df = input$df_chisq)),
+               yend=switch(input$scoreType,
+                           "z" = pnorm(score()),
+                           "t" = pt(score(), df = input$df_t),
+                           "chisq" = pchisq(score(), df = input$df_chisq)),
+               color=col2, lty=2, size=1.4) +
+      stat_function(fun = switch(input$scoreType,
+                                 "z" = pnorm,
+                                 "t" = function(x) pt(x, df = input$df_t),
+                                 "chisq" = function(x) pchisq(x, df = input$df_chisq)),
+                    color=col1, size = 1.4) +
+      ggtitle(switch(input$scoreType,
+                     "z" = "표준정규분포 누적 분포 함수",
+                     "t" = "t 분포 누적 분포 함수",
+                     "chisq" = "카이제곱 분포 누적 분포 함수")) +
+      labs(x="", y="") +
+      theme_bw() +
+      scale_x_continuous(limits = c(ifelse(input$scoreType == "chisq", 0, -5),
+                                    ifelse(input$scoreType == "chisq", max(20, score() + 2), 5)),
+                         expand = c(0, 0)) +
+      scale_y_continuous(limits = c(0, 1.14), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), expand = c(0, 0)) +
+      annotate("text", x=score() + 0.4,
+               y=switch(input$scoreType,
+                        "z" = pnorm(score()),
+                        "t" = pt(score(), df = input$df_t),
+                        "chisq" = pchisq(score(), df = input$df_chisq)) - 0.1,
+               label=toupper(input$scoreType),
+               parse=TRUE, size=5, color=col3) +
+      annotate("text", x=ifelse(input$scoreType == "chisq", 1, -3),
+               y=(switch(input$scoreType,
+                         "z" = pnorm(score()),
+                         "t" = pt(score(), df = input$df_t),
+                         "chisq" = pchisq(score(), df = input$df_chisq)) + 0.05),
+               label=("'P(X' <= x ~ ')'"),
+               parse=TRUE, size=5, color=col2) +
+      theme(axis.line = element_line(size=1, colour = "black"),
+            panel.grid.major = element_blank(),
+            panel.grid.minor = element_blank(),
+            panel.border = element_blank(),
+            panel.background = element_blank(),
+            plot.title = element_text(size = 20, family = "Tahoma", face = "bold"),
+            text=element_text(family="Tahoma"),
+            axis.text.x=element_text(colour="black", size = 11),
+            axis.text.y=element_text(colour="black", size = 11))
+
+    grid.arrange(p1, p2, nrow=1)
+  })
+
+  # 신뢰수준 계산
+  output$conf_level <- renderText({
+    conf_level <- round((1 - 2 * (1 - switch(input$scoreType,
+                                             "z" = pnorm(abs(score())),
+                                             "t" = pt(abs(score()), df = input$df_t),
+                                             "chisq" = pchisq(score(), df = input$df_chisq)))) * 100, 2)
+    paste0(conf_level, "%")
+  })
+
+  # 양측 검정에서의 점수 계산
+  output$score <- renderText({
+    conf_level <- (1 - 2 * (1 - switch(input$scoreType,
+                                       "z" = pnorm(abs(score())),
+                                       "t" = pt(abs(score()), df = input$df_t),
+                                       "chisq" = pchisq(score(), df = input$df_chisq))))
+    score_val <- round(switch(input$scoreType,
+                              "z" = qnorm(1 - (1 - conf_level) / 2),
+                              "t" = qt(1 - (1 - conf_level) / 2, df = input$df_t),
+                              "chisq" = qchisq(conf_level, df = input$df_chisq)), 2)
+    paste0(score_val)
+  })
+}
+
+# Run the application
+shinyApp(ui = ui, server = server)
+
+
+
+
+

2 코딩

+
+ + + +
+ +

라이센스

CC BY-SA-NC & GPL-3
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/04_testing/x_score/thumbnail.png b/docs/04_testing/x_score/thumbnail.png new file mode 100644 index 0000000..947eafb Binary files /dev/null and b/docs/04_testing/x_score/thumbnail.png differ diff --git a/docs/listings.json b/docs/listings.json index a77c1e1..656e22f 100644 --- a/docs/listings.json +++ b/docs/listings.json @@ -15,6 +15,7 @@ "/04_testing/one_prop/index.html", "/04_testing/two_means/index.html", "/04_testing/two_prop/index.html", + "/04_testing/x_score/index.html", "/05_infer/ci/index.html", "/05_infer/clt/index.html", "/06_theory/lln/index.html" @@ -63,7 +64,8 @@ "/04_testing/one_mean/index.html", "/04_testing/one_prop/index.html", "/04_testing/two_means/index.html", - "/04_testing/two_prop/index.html" + "/04_testing/two_prop/index.html", + "/04_testing/x_score/index.html" ] }, { diff --git a/docs/search.json b/docs/search.json index 9fd53df..cde16da 100644 --- a/docs/search.json +++ b/docs/search.json @@ -102,7 +102,7 @@ "href": "04_testing.html", "title": "통계 검정", "section": "", - "text": "정렬\n 디폴트\n \n 제목\n \n \n 날짜 - 날짜(오름차순)\n \n \n 날짜 - 날짜(내림차순)\n \n \n 저자\n \n \n \n \n \n \n \n\n\n\n\n\n\n\n\n\n\n단일 평균 가설 검정\n\n\n\n평균 검정\n\n\n단일 평균 z-검정\n\n\n단일 평균 t-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-13\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n단일 비율 가설 검정\n\n\n\n비율 검정\n\n\n단일 비율 z-검정\n\n\n단일 비율 t-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-13\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n이표본 평균차이 검정\n\n\n\n평균 검정\n\n\n단일 표본 t-검정\n\n\n대응 표본 t-검정\n\n\n독립 표본 t-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-13\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n2-비율 가설 검정\n\n\n\n비율 검정\n\n\n2-비율 z-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-13\n\n\n\n\n\n\n\n\n일치 없음\n\n라이센스CC BY-SA-NC & GPL-3" + "text": "정렬\n 디폴트\n \n 제목\n \n \n 날짜 - 날짜(오름차순)\n \n \n 날짜 - 날짜(내림차순)\n \n \n 저자\n \n \n \n \n \n \n \n\n\n\n\n\n\n\n\n\n\n단일 평균 가설 검정\n\n\n\n평균 검정\n\n\n단일 평균 z-검정\n\n\n단일 평균 t-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n단일 비율 가설 검정\n\n\n\n비율 검정\n\n\n단일 비율 z-검정\n\n\n단일 비율 t-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n이표본 평균차이 검정\n\n\n\n평균 검정\n\n\n단일 표본 t-검정\n\n\n대응 표본 t-검정\n\n\n독립 표본 t-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n2-비율 가설 검정\n\n\n\n비율 검정\n\n\n2-비율 z-검정\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nx-점수 확률계산\n\n\n\nz-점수\n\n\nt-점수\n\n\n카이제곱-점수\n\n\n확률계산\n\n\n\n\n\n\n\n이광춘\n\n\n2024-05-15\n\n\n\n\n\n\n\n\n일치 없음\n\n라이센스CC BY-SA-NC & GPL-3" }, { "objectID": "BitStat.html", @@ -271,5 +271,12 @@ "title": "신뢰구간", "section": "", "text": "신뢰구간은 통계학에서 모수 추정에 사용되는 중요한 개념입니다. 신뢰구간은 표본에서 계산된 추정량이 실제 모수를 포함할 확률을 나타내는 구간을 의미합니다. 신뢰구간은 모수 추정의 불확실성을 고려하여 모수 추정에 대한 더 정확한 정보를 제공합니다. 대표적으로 모집단 평균과 모집단 비율에 대한 신뢰구간을 계산하는 방법을 시각적으로 확인할 수 있습니다.\n\n1 Shiny 앱\n\n#| label: shinylive-infer-ci\n#| viewerWidth: 800\n#| viewerHeight: 600\n#| standalone: true\n\nlibrary(shiny)\nlibrary(ggplot2)\nlibrary(tidyverse)\nlibrary(showtext)\nshowtext_auto() \n\nui <- fluidPage(\n title = \"신뢰구간 모의실험\",\n\n fluidRow(\n column(3,\n h2('신뢰구간'),\n hr(),\n radioButtons(\"case\", \"평균과 비율 추정 선택:\",\n choices = c(\"모집단 평균\", \"모집단 비율\"),\n inline = TRUE,\n selected = \"모집단 평균\"),\n hr(),\n conditionalPanel(\n condition = \"input.case == '모집단 비율'\",\n sliderInput(\"pop_prop\",\n \"모집단 비율:\",\n min = 0,\n max = 1,\n step = 0.05,\n value = 0.4)\n ),\n conditionalPanel(\n condition = \"input.case == '모집단 평균'\",\n numericInput(\"pop_mean\", \"모집단 평균:\", value = 10),\n numericInput(\"pop_sd\", \"모집단 표준편차:\", value = 5, min = 0)\n ),\n hr(),\n sliderInput(\"sample_size\",\n \"표본 크기:\",\n min = 10,\n max = 1000,\n step = 10,\n value = 100),\n sliderInput(\"n_samples\",\n \"표본 수:\",\n min = 0,\n max = 500,\n step = 10,\n value = 10),\n sliderInput(\"conf_level\",\n \"신뢰 수준:\",\n min = 5,\n max = 99,\n step = 1,\n value = 95)\n ),\n column(8,\n mainPanel(\n plotOutput('IntervalPlot', height = '500px'),\n verbatimTextOutput(\"summary\"),\n verbatimTextOutput(\"ci\")\n )\n )\n )\n)\n\nserver <- function(input, output) {\n\n output$IntervalPlot <- renderPlot({\n n_samples = input$n_samples\n sample_size = input$sample_size\n alpha = 1 - (input$conf_level / 100)\n\n if (input$case == \"모집단 비율\") {\n n_population <- 100000\n population <- rbernoulli(n_population, p = input$pop_prop)\n true_value <- mean(population)\n\n sample_stats <- vector('numeric', n_samples)\n upper_cis <- vector('numeric', n_samples)\n lower_cis <- vector('numeric', n_samples)\n contains_true_value <- vector('numeric', n_samples)\n for (i in 1:n_samples) {\n sample <- sample(population, sample_size, replace = FALSE)\n sample_stat <- mean(sample)\n sample_std <- (sqrt(sample_stat * (1 - sample_stat)) / sqrt(sample_size))\n z_value <- qnorm(alpha/2, lower.tail = FALSE)\n margin_of_error <- z_value * sample_std\n\n sample_stats[i] <- sample_stat\n upper_cis[i] <- sample_stat + margin_of_error\n lower_cis[i] <- sample_stat - margin_of_error\n contains_true_value[i] <- ifelse(true_value >= lower_cis[i] & true_value <= upper_cis[i], 1, 0)\n }\n\n title_text <- paste(input$conf_level, \"% 신뢰구간 (모집단 비율)\", sep = \" \")\n y_label <- \"표본 비율\"\n\n } else if (input$case == \"모집단 평균\") {\n n_population <- 100000\n population <- rnorm(n_population, mean = input$pop_mean, sd = input$pop_sd)\n true_value <- mean(population)\n\n sample_stats <- vector('numeric', n_samples)\n upper_cis <- vector('numeric', n_samples)\n lower_cis <- vector('numeric', n_samples)\n contains_true_value <- vector('numeric', n_samples)\n for (i in 1:n_samples) {\n sample <- sample(population, sample_size, replace = FALSE)\n sample_stat <- mean(sample)\n sample_std <- sd(sample) / sqrt(sample_size)\n t_value <- qt(alpha/2, df = sample_size - 1, lower.tail = FALSE)\n margin_of_error <- t_value * sample_std\n\n sample_stats[i] <- sample_stat\n upper_cis[i] <- sample_stat + margin_of_error\n lower_cis[i] <- sample_stat - margin_of_error\n contains_true_value[i] <- ifelse(true_value >= lower_cis[i] & true_value <= upper_cis[i], 1, 0)\n }\n\n title_text <- paste(input$conf_level, \"% 신뢰구간 (모집단 평균)\", sep = \" \")\n y_label <- \"표본 평균\"\n }\n\n subtitle_text <- paste0('관측된 포함률: ',\n round(mean(contains_true_value), 2) * 100, '%')\n\n contains_true_value_f <- ifelse(contains_true_value==1, \"포함\", \"미포함\")\n contains_true_value_f <- factor(contains_true_value_f)\n\n p <- tibble(\n sample_stat = sample_stats,\n upper_ci = upper_cis,\n lower_ci = lower_cis) %>%\n mutate(sample_number = as.factor(row_number())) %>%\n ggplot(aes(x = sample_number, y = sample_stat)) +\n coord_flip() +\n ggtitle(title_text, subtitle = subtitle_text) +\n ylab(y_label) +\n xlab(\"\") +\n theme_minimal() +\n theme(axis.text.y = element_blank(),\n axis.ticks.y = element_blank(),\n axis.line.x = element_line(color=\"black\", size = 0.5),\n legend.position = \"none\") +\n scale_x_discrete(breaks = NULL)\n\n p <- p + geom_hline(aes(yintercept = true_value),\n linetype = 'dashed', size = 1)\n p <- p +\n geom_linerange(aes(ymin = lower_ci, ymax = upper_ci, color=contains_true_value_f)) +\n geom_point(aes(color=contains_true_value_f), size=2) +\n scale_color_manual(values=c(\"포함\" = \"#009E73\", \"미포함\" = \"#D55E00\"))\n\n\n p\n })\n\n output$summary <- renderPrint({\n n_samples = input$n_samples\n sample_size = input$sample_size\n\n if (input$case == \"모집단 비율\") {\n n_population <- 100000\n population <- rbernoulli(n_population, p = input$pop_prop)\n\n sample_stats <- vector('numeric', n_samples)\n for (i in 1:n_samples) {\n sample <- sample(population, sample_size, replace = FALSE)\n sample_stats[i] <- mean(sample)\n }\n\n cat(\"표본 비율의 평균(점추정):\\n\")\n print(mean(sample_stats))\n\n } else if (input$case == \"모집단 평균\") {\n n_population <- 100000\n population <- rnorm(n_population, mean = input$pop_mean, sd = input$pop_sd)\n\n sample_stats <- vector('numeric', n_samples)\n for (i in 1:n_samples) {\n sample <- sample(population, sample_size, replace = FALSE)\n sample_stats[i] <- mean(sample)\n }\n\n cat(\"표본 평균의 평균(점 추정):\\n\")\n print(mean(sample_stats))\n }\n })\n\n output$ci <- renderPrint({\n n_samples = input$n_samples\n sample_size = input$sample_size\n alpha = 1 - (input$conf_level / 100)\n\n if (input$case == \"모집단 비율\") {\n n_population <- 100000\n population <- rbernoulli(n_population, p = input$pop_prop)\n\n sample_stats <- vector('numeric', n_samples)\n for (i in 1:n_samples) {\n sample <- sample(population, sample_size, replace = FALSE)\n sample_stats[i] <- mean(sample)\n }\n\n se <- sqrt(mean(sample_stats) * (1 - mean(sample_stats)) / sample_size)\n lower_ci <- mean(sample_stats) - qnorm(1 - alpha/2) * se\n upper_ci <- mean(sample_stats) + qnorm(1 - alpha/2) * se\n\n cat(paste0(input$conf_level, \"% 신뢰구간 (모집단 비율):\\n\"))\n cat(paste0(\"[\", round(lower_ci, 2), \", \", round(upper_ci, 2), \"]\"))\n\n } else if (input$case == \"모집단 평균\") {\n n_population <- 100000\n population <- rnorm(n_population, mean = input$pop_mean, sd = input$pop_sd)\n\n sample_stats <- vector('numeric', n_samples)\n for (i in 1:n_samples) {\n sample <- sample(population, sample_size, replace = FALSE)\n sample_stats[i] <- mean(sample)\n }\n\n se <- sd(sample_stats) / sqrt(n_samples)\n lower_ci <- mean(sample_stats) - qt(1 - alpha/2, df = n_samples - 1) * se\n upper_ci <- mean(sample_stats) + qt(1 - alpha/2, df = n_samples - 1) * se\n\n cat(paste0(input$conf_level, \"% 신뢰구간 (모집단 평균):\\n\"))\n cat(paste0(\"[\", round(lower_ci, 2), \", \", round(upper_ci, 2), \"]\"))\n }\n })\n}\n\nshinyApp(ui = ui, server = server)\n\n\n\n2 코딩\n\nPlease enable JavaScript to experience the dynamic code cell content on this page.\n\n\n\n\n라이센스CC BY-SA-NC & GPL-3" + }, + { + "objectID": "04_testing/x_score/index.html", + "href": "04_testing/x_score/index.html", + "title": "x-점수 확률계산", + "section": "", + "text": "“z-점수”, “t-점수”, “카이제곱-점수”, 확률계산은 가설 검정과 신뢰구간 계산에서 유용합니다. 많은 통계적 가설 검정에서 z-점수를 사용하여 유의 확률(p-value)을 계산하며, 특정 z-점수에 해당하는 누적 확률을 빠르게 찾을 수 있습니다. 또한, 정규분포를 따르는 데이터의 신뢰구간 계산 시, z-점수를 사용하여 신뢰구간의 상한과 하한을 결정할 수 있습니다. t-점수는 주로 표본 크기가 작거나 모집단의 표준편차를 모를 때 사용되며, t-검정을 통해 평균 간의 차이를 검정하거나 신뢰구간을 계산하는 데 사용됩니다. 카이제곱-점수는 주로 범주형 데이터의 독립성 검정이나 적합도 검정에서 사용되며, 관측된 빈도와 기대 빈도 간의 차이를 평가하는 데 유용합니다.\n\n1 Shiny 앱\n\n#| label: shinylive-testing-score\n#| viewerWidth: 800\n#| viewerHeight: 700\n#| standalone: true\n\nlibrary(shiny)\nlibrary(showtext)\nshowtext_auto() \n\n# Define UI for application that draws a histogram\nui <- fluidPage(\n # Application title\n titlePanel(\"통계 검정에 중요한 점수의 변화에 따른 확률 계산\"),\n\n tags$div(HTML(\"<script type='text/x-mathjax-config' >\n MathJax.Hub.Config({\n tex2jax: {inlineMath: [['$','$'], ['\\\\(','\\\\)']]}\n });\n </script >\n \")),\n\n # Sidebar with inputs and options\n sidebarLayout(\n sidebarPanel(\n radioButtons(\"scoreType\", \"점수 선택:\",\n c(\"Z-score\" = \"z\",\n \"T-score\" = \"t\",\n \"Chi-square 점수\" = \"chisq\")),\n conditionalPanel(\n condition = \"input.scoreType == 'z'\",\n sliderInput(\"z\", \"Z-score\", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96)\n ),\n conditionalPanel(\n condition = \"input.scoreType == 't'\",\n sliderInput(\"t\", \"T-score\", min = -5, max = 5, step = 0.01, ticks = TRUE, value = 1.96),\n numericInput(\"df_t\", \"자유도\", value = 10, min = 1, step = 1)\n ),\n conditionalPanel(\n condition = \"input.scoreType == 'chisq'\",\n sliderInput(\"chisq\", \"Chi-square 점수\", min = 0, max = 20, step = 0.01, ticks = TRUE, value = 3.84),\n numericInput(\"df_chisq\", \"자유도\", value = 1, min = 1, step = 1)\n ),\n withMathJax(),\n p(\"$P(X \\\\leq x) =$\"),\n textOutput(\"prob\"),\n hr(),\n p(\"신뢰수준 (양측):\"),\n textOutput(\"conf_level\"),\n p(\"대응하는 점수:\"),\n textOutput(\"score\")\n ),\n\n # Show a plot of the generated distribution\n mainPanel(\n plotOutput(\"plot\")\n )\n )\n)\n\n# Define server logic required to draw a histogram\nserver <- function(input, output) {\n\n score <- reactive({\n switch(input$scoreType,\n \"z\" = input$z,\n \"t\" = input$t,\n \"chisq\" = input$chisq)\n })\n\n output$prob <- renderPrint({\n switch(input$scoreType,\n \"z\" = pnorm(score()),\n \"t\" = pt(score(), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq))\n })\n\n library(ggplot2)\n library(gridExtra)\n\n # manually save colors\n col1 <- \"#3B429F\"\n col2 <- \"#76BED0\"\n col3 <- \"#F55D3E\"\n\n output$plot <- renderPlot({\n # useful \"shader\" function taken from: https://t-redactyl.io/blog/2016/03/creating-plots-in-r-using-ggplot2-part-9-function-plots.html\n funcShaded <- function(x) {\n y <- switch(input$scoreType,\n \"z\" = dnorm(x),\n \"t\" = dt(x, df = input$df_t),\n \"chisq\" = dchisq(x, df = input$df_chisq))\n y[x > score()] <- NA\n return(y)\n }\n\n p1 <- ggplot(data.frame(x = c(ifelse(input$scoreType == \"chisq\", 0, -20), 20)), aes(x = x)) +\n stat_function(fun=funcShaded, geom=\"area\", fill=col2, alpha=0.6) +\n stat_function(fun = switch(input$scoreType,\n \"z\" = dnorm,\n \"t\" = function(x) dt(x, df = input$df_t),\n \"chisq\" = function(x) dchisq(x, df = input$df_chisq)),\n color=col1, size = 1.4) +\n ggtitle(switch(input$scoreType,\n \"z\" = \"표준정규분포 확률 밀도 함수\",\n \"t\" = \"t 분포 확률 밀도 함수\",\n \"chisq\" = \"카이제곱 분포 확률 밀도 함수\")) +\n labs(x=\"\", y=\"\") +\n theme_bw() +\n scale_x_continuous(limits = c(ifelse(input$scoreType == \"chisq\", 0, -5),\n ifelse(input$scoreType == \"chisq\", max(20, score() + 2), 5)),\n expand = c(0, 0)) +\n scale_y_continuous(limits = c(0, 0.5), expand = c(0, 0)) +\n geom_vline(xintercept=score(), lty=2, size=1.2, color=col3) +\n annotate(\"text\", x=ifelse(score()<0 | input$scoreType == \"chisq\", score() + 0.4, score() - 0.4),\n y=funcShaded(score()) + 0.05, label=toupper(input$scoreType),\n parse=TRUE, size=5, color=col3) +\n theme(axis.line = element_line(size=1, colour = \"black\"),\n panel.grid.major = element_blank(),\n panel.grid.minor = element_blank(),\n panel.border = element_blank(),\n panel.background = element_blank(),\n plot.title = element_text(size = 20, family = \"Tahoma\", face = \"bold\"),\n text=element_text(family=\"Tahoma\"),\n axis.text.x=element_text(colour=\"black\", size = 11),\n axis.text.y=element_text(colour=\"black\", size = 11))\n\n p2 <- ggplot(data.frame(x = c(ifelse(input$scoreType == \"chisq\", 0, -20), 20)), aes(x = x)) +\n annotate(\"segment\", x=score(), xend=score(),\n y=0, yend=switch(input$scoreType,\n \"z\" = pnorm(score()),\n \"t\" = pt(score(), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq)),\n color=col3, lty=2, size=1.4) +\n annotate(\"segment\", x=ifelse(input$scoreType == \"chisq\", 0, -5), xend=score(),\n y=switch(input$scoreType,\n \"z\" = pnorm(score()),\n \"t\" = pt(score(), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq)),\n yend=switch(input$scoreType,\n \"z\" = pnorm(score()),\n \"t\" = pt(score(), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq)),\n color=col2, lty=2, size=1.4) +\n stat_function(fun = switch(input$scoreType,\n \"z\" = pnorm,\n \"t\" = function(x) pt(x, df = input$df_t),\n \"chisq\" = function(x) pchisq(x, df = input$df_chisq)),\n color=col1, size = 1.4) +\n ggtitle(switch(input$scoreType,\n \"z\" = \"표준정규분포 누적 분포 함수\",\n \"t\" = \"t 분포 누적 분포 함수\",\n \"chisq\" = \"카이제곱 분포 누적 분포 함수\")) +\n labs(x=\"\", y=\"\") +\n theme_bw() +\n scale_x_continuous(limits = c(ifelse(input$scoreType == \"chisq\", 0, -5),\n ifelse(input$scoreType == \"chisq\", max(20, score() + 2), 5)),\n expand = c(0, 0)) +\n scale_y_continuous(limits = c(0, 1.14), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), expand = c(0, 0)) +\n annotate(\"text\", x=score() + 0.4,\n y=switch(input$scoreType,\n \"z\" = pnorm(score()),\n \"t\" = pt(score(), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq)) - 0.1,\n label=toupper(input$scoreType),\n parse=TRUE, size=5, color=col3) +\n annotate(\"text\", x=ifelse(input$scoreType == \"chisq\", 1, -3),\n y=(switch(input$scoreType,\n \"z\" = pnorm(score()),\n \"t\" = pt(score(), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq)) + 0.05),\n label=(\"'P(X' <= x ~ ')'\"),\n parse=TRUE, size=5, color=col2) +\n theme(axis.line = element_line(size=1, colour = \"black\"),\n panel.grid.major = element_blank(),\n panel.grid.minor = element_blank(),\n panel.border = element_blank(),\n panel.background = element_blank(),\n plot.title = element_text(size = 20, family = \"Tahoma\", face = \"bold\"),\n text=element_text(family=\"Tahoma\"),\n axis.text.x=element_text(colour=\"black\", size = 11),\n axis.text.y=element_text(colour=\"black\", size = 11))\n\n grid.arrange(p1, p2, nrow=1)\n })\n\n # 신뢰수준 계산\n output$conf_level <- renderText({\n conf_level <- round((1 - 2 * (1 - switch(input$scoreType,\n \"z\" = pnorm(abs(score())),\n \"t\" = pt(abs(score()), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq)))) * 100, 2)\n paste0(conf_level, \"%\")\n })\n\n # 양측 검정에서의 점수 계산\n output$score <- renderText({\n conf_level <- (1 - 2 * (1 - switch(input$scoreType,\n \"z\" = pnorm(abs(score())),\n \"t\" = pt(abs(score()), df = input$df_t),\n \"chisq\" = pchisq(score(), df = input$df_chisq))))\n score_val <- round(switch(input$scoreType,\n \"z\" = qnorm(1 - (1 - conf_level) / 2),\n \"t\" = qt(1 - (1 - conf_level) / 2, df = input$df_t),\n \"chisq\" = qchisq(conf_level, df = input$df_chisq)), 2)\n paste0(score_val)\n })\n}\n\n# Run the application\nshinyApp(ui = ui, server = server)\n\n\n\n\n2 코딩\n\nPlease enable JavaScript to experience the dynamic code cell content on this page.\n\n\n\n\n라이센스CC BY-SA-NC & GPL-3" } ] \ No newline at end of file diff --git a/docs/sitemap.xml b/docs/sitemap.xml index d992d97..2c47283 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -116,4 +116,8 @@ https://r2bit.com/BitStat2/05_infer/ci/index.html 2024-05-15T07:23:21.132Z + + https://r2bit.com/BitStat2/04_testing/x_score/index.html + 2024-05-15T12:47:18.640Z + diff --git a/study.csv b/study.csv new file mode 100644 index 0000000..bee94c6 --- /dev/null +++ b/study.csv @@ -0,0 +1,101 @@ +party,support +A,1 +A,1 +A,0 +A,1 +A,1 +A,0 +A,1 +A,1 +A,1 +A,0 +B,1 +B,0 +B,1 +B,0 +B,0 +B,1 +B,1 +B,0 +B,1 +B,0 +C,1 +C,1 +C,1 +C,0 +C,1 +C,1 +C,1 +C,0 +C,1 +C,1 +D,0 +D,0 +D,1 +D,0 +D,1 +D,0 +D,0 +D,1 +D,0 +D,0 +A,1 +A,1 +A,1 +A,0 +A,1 +B,0 +B,1 +B,0 +B,1 +B,0 +C,1 +C,1 +C,0 +C,1 +C,1 +D,0 +D,0 +D,0 +D,1 +D,0 +A,1 +A,0 +A,1 +A,1 +A,1 +B,1 +B,0 +B,1 +B,0 +B,1 +C,1 +C,1 +C,1 +C,0 +C,1 +D,0 +D,1 +D,0 +D,0 +D,0 +A,1 +A,1 +A,0 +A,1 +A,1 +B,0 +B,1 +B,1 +B,0 +B,1 +C,1 +C,0 +C,1 +C,1 +C,1 +D,0 +D,0 +D,0 +D,1 +D,0