diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index c2c5297..ecb6469 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -1,8 +1,6 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
-  push:
-    branches: main
   pull_request:
     branches: main
 
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 0b5cf48..f30826c 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -3,8 +3,6 @@
 on:
   push:
     branches: main
-  pull_request:
-    branches: main
   release:
     types: [published]
   workflow_dispatch:
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 0317144..b31d89a 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -1,8 +1,6 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
-  push:
-    branches: main
   pull_request:
     branches: main
 
diff --git a/DESCRIPTION b/DESCRIPTION
index a0e8ec9..c02d9a4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,10 +1,12 @@
 Package: RBigKinds
-Title: What the Package Does (One Line, Title Case)
-Version: 0.0.1.9000
-Authors@R: 
-    person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: What the package does (one paragraph).
+Title: BigKinds Data Analysis Toolkit for R
+Version: 0.1.0.9000
+Authors@R: c(
+  person("Jaeseong", "Choe",, "cjssoote@gmail.com", role = c("aut", "cre"))
+  )
+Description: RBigKinds is a library for Data Analysis of BigKinds data through R.
+Author: Jaeseonc Choe [aut, cre]
+Maintainer: Jaeseong Choe <cjssoote@gmail.com>
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
index 921bce5..e9465dd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,27 @@
 # Generated by roxygen2: do not edit by hand
 
-export(counter_to_dataframe)
-export(duplication_remover)
+export(DBSCAN)
+export(Kmeans)
+export(MeanShift)
+export(association)
+export(day_range)
 export(header_remover)
-export(keyword_list)
-export(keyword_parser)
-export(word_counter)
+export(keyword_dataframe)
+export(keyword_dataframe_no_duplicated)
+export(keywords_wordcloud)
+export(lda)
+export(normalize_vector)
+export(press_counter)
+export(tfidf)
+export(tfidf_vector)
+export(top_words)
+export(word_tokenizer)
+import(arules)
+import(dbscan)
+import(dplyr)
+import(ggplot2)
+import(tibble)
+import(tidymodels)
+import(tidytext)
+import(tm)
+import(wordcloud2)
diff --git a/R/RBigKinds.R b/R/RBigKinds.R
index acdd0c1..0c4e69a 100644
--- a/R/RBigKinds.R
+++ b/R/RBigKinds.R
@@ -12,12 +12,3 @@ NULL
 # devtools::use_package("rmarkdown","Suggests")
 # devtools::use_package("testthat","Suggests")
 #
-# ### Preprocessing
-# devtools::use_package("dplyr","Imports")
-# devtools::use_package("rvest","Imports")
-#
-# ### Import
-# devtools::use_package("readxl", "Imports")
-#
-# ### export
-# devtools::use_package("writexl",Imports")
\ No newline at end of file
diff --git a/R/association.R b/R/association.R
new file mode 100644
index 0000000..545f3c4
--- /dev/null
+++ b/R/association.R
@@ -0,0 +1,29 @@
+#' association
+#'
+#' 기사에 등장한 단어 별로 연관분석을 진행합니다.
+#' 연관분석 방법은 Apriori입니다.
+#' 
+#' @param df BigKinds 원본 문서
+#' @param min_support 최소 지지도
+#' @param minlen 연관된 최소 갯수
+#' @param maxlen 연관된 최대 갯수
+#'
+#' @examples
+#' association(df, min_support = 0.6, minlen = 3, maxlen = 10)
+#' @import arules
+#' @export
+association <- function(df, min_support = 0.5, minlen=2, maxlen = 10) {
+  if (is.data.frame(df)) {
+    words <- word_tokenizer(df)
+    data <- split(words$키워드, words$제목)
+    te_data <- as(data, "transactions")
+    result <- apriori(te_data, parameter = list(supp = min_support, minlen=minlen, maxlen=maxlen, target = "rules"))
+    result <- as.data.frame(inspect(result))
+    result <- result[, c("lhs", "rhs", "support", "confidence")]
+    colnames(result) <- c("lhs", "rhs", "support", "confidence")
+    result <- result[result[, "confidence"] > min_support ]
+    return(result)
+  } else {
+    stop("input type is to be have to df")
+  }
+}
\ No newline at end of file
diff --git a/R/barplot.R b/R/barplot.R
new file mode 100644
index 0000000..1485002
--- /dev/null
+++ b/R/barplot.R
@@ -0,0 +1,32 @@
+#' top_words
+#'
+#' 언론사 별로 가장 많이 등장한 단어 순위를 시각화합니다.
+#' 최대 몇개의 단어를 추출할지는 직접 정할 수 있습니다.
+#' default는 25개입니다.
+#' 
+#' @param df BigKinds 원본 문서
+#' @param press 확인할 언론사 이름
+#' @param top_n 시각화할 단어 갯수
+#'
+#' @examples
+#' top_words(df, "경향신문", top_n=30)
+#' @import ggplot2
+#' @import dplyr
+#' @export
+top_words <- function(df, press = NA, top_n = 25) {
+  if (is.data.frame(df)) {
+    if (!is.na(press)){
+      df <- df |> filter(언론사 == press)
+    }
+    data <- keyword_dataframe(df)
+    data <- head(data[order(data$n, decreasing = TRUE), ], top_n)
+    ggplot(data, aes(reorder(키워드, n), n)) +
+      geom_bar(stat = "identity", fill = "steelblue") +
+      labs(x = "단어", y = "빈도") +
+      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+      ggtitle("사용 단어 빈도 상위", top_n) +
+      coord_flip()
+  } else {
+    stop("input type is to be have to DataFrame")
+  }
+}
\ No newline at end of file
diff --git a/R/clustering.R b/R/clustering.R
new file mode 100644
index 0000000..0b728b8
--- /dev/null
+++ b/R/clustering.R
@@ -0,0 +1,69 @@
+#' Kmeans
+#'
+#' kmeans clustering을 진행합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#' @param k  형성할 군집 갯수
+#' @param random_state seed 값
+#'
+#' @examples
+#' Kmeans(df, k = 3)
+#' 
+#' @export
+Kmeans <- function(vec, k, random_state = 123) {
+  if (is.matrix(vec)) {
+    set.seed(random_state)
+    kmeans_model <- kmeans(vec, centers = k, iter.max = 1000)
+    return(kmeans_model$cluster)
+  } else {
+    stop("input type is to be have to matrix")
+  }
+}
+
+#' DBSCAN
+#'
+#' DBSCAN 알고리즘을 진행합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#' @param eps epsilon 값(보폭)
+#' @param min_sample 최적 샘플 갯수
+#' @param metric 거리 계산 방법(default = euclidean)
+#'
+#' @examples
+#' DBSCAN(vec, eps = 0.5, min_sample = 3)
+#' 
+#' @import dbscan
+#' 
+#' @export
+DBSCAN <- function(vec, eps, min_samples, metric = "euclidean") {
+  if (is.matrix(vec)) {
+    dbscan_model <- dbscan::dbscan(vec, eps = eps, minPts = min_samples, method = metric)
+    return(dbscan_model$cluster)
+  } else {
+    stop("input type is to be have to matrix")
+  }
+}
+
+#' MeanShift
+#'
+#' mean shift clustering을 진행합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#' @param qt quantile 값(최적 bandwidth 추정을 위함)
+#'
+#' @examples
+#' MeanShift(vec, at = 0.5)
+#' 
+#' @export
+MeanShift <- function(vec, qt = 0.25) {
+  if (is.matrix(vec)) {
+    best_bandwidth <- density(vec)$bw
+    print(paste(qt, "기준 최적 bandwidth 값:", round(best_bandwidth, 2)))
+    
+    ms_model <- meanshift(vec, bandwidth = best_bandwidth)
+    print(paste("cluster 갯수:", length(unique(ms_model))))
+    return(ms_model)
+  } else {
+    stop("input type is to be have to matrix")
+  }
+}
\ No newline at end of file
diff --git a/R/day_range.R b/R/day_range.R
new file mode 100644
index 0000000..03ad958
--- /dev/null
+++ b/R/day_range.R
@@ -0,0 +1,18 @@
+#' day_range
+#'
+#' 단어 범위를 return합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#'
+#' @examples
+#' day_range(df)
+#'
+#' @export
+day_range <- function(df) {
+  if (is.data.frame(df)) {
+    print(paste("first day: ", min(df$일자)))
+    print(paste("last day: ", max(df$일자)))
+  } else {
+    stop("input type is to be have to DataFrame")
+  }
+}
\ No newline at end of file
diff --git a/R/global.R b/R/global.R
index f3fd2bb..1f26caf 100644
--- a/R/global.R
+++ b/R/global.R
@@ -1,123 +1,176 @@
-#' [] 표시된 헤더 삭제
+#' header_remover
 #'
+#' 상단에 존재하는 헤더를 제거합니다.
+#' 
+#' @param df BigKinds 원본 문서
 #'
-#' @param infile Path to the input file
-#' @return A matrix of the infile
+#' @examples
+#' data <- header_remover(df)
+#' head(data)
 #' @export
 header_remover <- function(df) {
   if (is.data.frame(df)) {
     ans <- gsub("\\[[^)]*\\]", "", df$`제목`)
     df$`제목` <- ans
     return(df)
-  } else if (is.list(df)) {
-    ans <- gsub("\\[[^)]*\\]", "", df)
-    return(ans)
   } else {
     stop("input value is to be have to list or DataFrame")
   }
 }
 
-#' 키워드를 list로 변환
+#' word_tokenizer
 #'
+#' 파일로부터 문서 별 키워드로 나열된 데이터 프레임으로 변환합니다.
+#' 
+#' @param df BigKinds 원본 문서
 #'
-#' @param infile Path to the input file
-#' @return A matrix of the infile
+#' @examples
+#' data <- word_tokenizer(df)
+#' view(data)
+#' @import tm
+#' @import tibble
+#' @import dplyr
 #' @export
-keyword_list <- function(df) {
+word_tokenizer <- function(df) {
   if (is.data.frame(df)) {
-    return(df$`키워드`)
-  } else if (is.list(df)) {
-    return(df)
+    df |> 
+      select(`제목`,`키워드`) |> 
+      rowid_to_column() |> 
+      unnest_tokens(
+        input = "키워드",
+        output = "키워드"
+      ) -> keywords
+    return(keywords)
   } else {
     stop("input value is to be have to list or DataFrame")
   }
 }
 
-#' [] 키워드 파싱
+
+#' keyword_dataframe
 #'
+#' BigKinds 데이터 셋을 키워드 갯수 데이터프레임으로 변환합니다.
+#' 
+#' @param df BigKinds 원본 문서
 #'
-#' @param infile Path to the input file
-#' @return A matrix of the infile
+#' @examples
+#' data <- keyword_dataframe(df)
+#' view(data)
+#' @import tm
+#' @import tibble
+#' @import dplyr
 #' @export
-keyword_parser <- function(text_list) {
-  if (is.list(text_list)) {
-    news_key <- list()
-    for (word in text_list) {
-      if (is.character(word)) {
-        word <- strsplit(word, ",")[[1]]
-        news_key <- c(news_key, list(word))
-      } else {
-        stop("input list is not valid format")
-      }
-    }
-    return(news_key)
+keyword_dataframe <- function(df) {
+  if (is.data.frame(df)) {
+    data <- word_tokenizer(df) 
+    data |> 
+      group_by(키워드) |> 
+      tally() |> 
+      arrange(desc(n)) |> 
+      as_tibble() -> keywords
+    return(keywords)
   } else {
-    stop("input type is to be have to list")
+    stop("input type is to be have to DataFrame")
   }
 }
 
-#' 중복 값 제거
+#' keyword_dataframe_no_duplicated
 #'
+#' BigKinds 데이터 셋을 키워드 갯수 데이터프레임(중복 미포함)으로 변환합니다.
+#' 
+#' @param df BigKinds 원본 문서
 #'
-#' @param infile Path to the input file
-#' @return A matrix of the infile
+#' @examples
+#' data <- keyword_dataframe_no_duplicated(df)
+#' view(data)
+#' @import tm
+#' @import tibble
+#' @import dplyr
 #' @export
-duplication_remover <- function(news_key) {
-  if (is.list(news_key)) {
-    news_value <- list()
-    for (j in news_key) {
-      if (is.list(j)) {
-        j <- unique(j)
-        news_value <- c(news_value, list(j))
-      } else {
-        stop("input list is not valid format")
-      }
-    }
-    return(news_value)
+keyword_dataframe_no_duplicated <- function(df) {
+  if (is.data.frame(df)) {
+    data <- word_tokenizer(df) 
+    
+    keywords_no_duplicated <- data[!duplicated(data[,c(2,3)]),]
+    
+    keywords_no_duplicated |> 
+      group_by(키워드) |> 
+      tally() |> 
+      arrange(desc(n)) |> 
+      as_tibble() -> return_keywords
+    return(return_keywords)
   } else {
-    stop("input type is to be have to list")
+    stop("input type is to be have to DataFrame")
   }
 }
 
-#' 단어 갯수 카운트
+#' tfidf
 #'
+#' 키워드의 tfidf score를 포함한 데이터 프레임을 반환합니다.
+#' 
+#' @param df BigKinds 원본 문서
 #'
-#' @param infile Path to the input file
-#' @return A matrix of the infile
+#' @examples
+#' data <- tfidf(df)
+#' view(data)
+#' @import tm
+#' @import tibble
+#' @import dplyr
+#' @import tidytext
 #' @export
-word_counter <- function(news_value) {
-  if (is.list(news_value)) {
-    key_words <- list()
-    for (k in seq_along(news_value)) {
-      for (i in news_value[[k]]) {
-        if (!(i %in% names(key_words))) {
-          key_words[[i]] <- 1
-        } else {
-          key_words[[i]] <- key_words[[i]] + 1
-        }
-      }
-    }
-    return(key_words)
+tfidf <- function(df) {
+  if (is.data.frame(df)) {
+    data <- word_tokenizer(df) 
+    data |>
+      bind_tf_idf(term = `키워드`, document = `제목`, n = rowid) -> tfidf
+    return(tfidf)
   } else {
-    stop("input type is to be have to list")
+    stop("input type is to be have to DataFrame")
   }
 }
 
+#' tfidf_vector
+#'
+#' tfidf vector로 변환합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#'
+#' @examples
+#' data <- tfidf_vector(df)
+#' view(data)
+#' @import tm
+#' @import tibble
+#' @import dplyr
+#' @import tidytext
+#' @export
+tfidf_vector <- function(df) {
+  if (is.data.frame(df)) {
+    data <- word_tokenizer(df) 
+    
+    dtm <- DocumentTermMatrix(Corpus(VectorSource(data$키워드)))
+    tdm <- weightTfIdf(dtm)
+    vec <- as.matrix(tdm)
+    return(vec)
+  } else {
+    stop("input type is to be have to DataFrame")
+  }
+}
 
-#' counter dict --> dataframe
+#' normalize_vector
+#'
+#' 벡터를 정규화합니다.(row 기준 minmax scaling)
 #'
+#' @param vec tfidf vector
 #'
-#' @param infile Path to the input file
-#' @return A matrix of the infile
 #' @export
-counter_to_dataframe <- function(key_words) {
-  if (is.list(key_words)) {
-    word_df <- data.frame(matrix(unlist(key_words), ncol = 2, byrow = TRUE))
-    colnames(word_df) <- c("단어", "빈도")
-    word_df <- word_df[order(word_df$`빈도`, decreasing = TRUE), , drop = FALSE, ]
-    rownames(word_df) <- NULL
-    return(word_df)
+normalize_vector <- function(vec) {
+  if (is.matrix(vec)) {
+    vec_nor <- t(normalize(t(vec)))
+    return(vec_nor)
   } else {
-    stop("input type is to be have to dict")
+    stop("input type is to be have to matrix")
   }
 }
+normalize <- function(x, na.rm = TRUE) {
+  return((x- min(x)) /(max(x)-min(x)))
+}
diff --git a/R/preprocessing.R b/R/preprocessing.R
deleted file mode 100644
index 68165c7..0000000
--- a/R/preprocessing.R
+++ /dev/null
@@ -1,67 +0,0 @@
-source(global.R)
-
-keyword_dataframe <- function(df) {
-  if (is.data.frame(df)) {
-    lis <- keyword_list(df)
-    keywords <- keyword_parser(lis)
-    counter <- word_counter(keywords)
-    df <- counter_to_dataframe(counter)
-    return(df)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-keyword_dataframe_no_duplicated <- function(df) {
-  if (is.data.frame(df)) {
-    lis <- keyword_list(df)
-    keywords <- keyword_parser(lis)
-    keywords_set <- duplication_remover(keywords)
-    counter <- word_counter(keywords_set)
-    df <- counter_to_dataframe(counter)
-    return(df)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-tfidf <- function(df, ...) {
-  if (is.data.frame(df)) {
-    if (length(...) > 0 && is.character(...)) {
-      df <- df[, ...]
-    }
-    lis <- keyword_list(df)
-    tfidfv <- DocumentTermMatrix(Corpus(VectorSource(lis)), control = list(weighting = weightTfIdf))
-    word_count <- data.frame(
-      단어 = colnames(tfidfv),
-      빈도 = colSums(as.matrix(tfidfv))
-    ) %>%
-      arrange(desc(빈도)) %>%
-      mutate(index = row_number()) %>%
-      select(-index)
-    return(word_count)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-tfidf_vector <- function(df) {
-  if (is.data.frame(df)) {
-    lis <- keyword_list(df)
-    dtm <- DocumentTermMatrix(Corpus(VectorSource(lis)))
-    tdm <- weightTfIdf(dtm)
-    vec <- as.matrix(tdm)
-    return(vec)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-normalize_vector <- function(vec) {
-  if (is.matrix(vec)) {
-    vec_nor <- t(normalize(t(vec)))
-    return(vec_nor)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
diff --git a/R/press_counter.R b/R/press_counter.R
new file mode 100644
index 0000000..8cfa1d3
--- /dev/null
+++ b/R/press_counter.R
@@ -0,0 +1,20 @@
+#' press_counter
+#'
+#' 언론사 별 기사의 갯수를 반환합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#'
+#' @examples
+#' press_counter(df)
+#' 
+#' @export
+press_counter <- function(df) {
+  if (is.data.frame(df)) {
+    freq <- table(df$언론사)
+    brod_df <- data.frame(언론사 = names(freq), 기사 = as.numeric(freq))
+    return(brod_df)
+  } else {
+    stop("input type is to be have to DataFrame")
+  }
+}
+
diff --git a/R/representation.R b/R/representation.R
deleted file mode 100644
index 095f19d..0000000
--- a/R/representation.R
+++ /dev/null
@@ -1,130 +0,0 @@
-library(tidyverse)
-library(arules)
-library(proxy)
-
-source(global.R)
-
-day_range <- function(df) {
-  if (is.data.frame(df)) {
-    print(paste("first day: ", min(df$일자)))
-    print(paste("last day: ", max(df$일자)))
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-press_counter <- function(df) {
-  if (is.data.frame(df)) {
-    freq <- table(df$언론사)
-    brod_df <- data.frame(언론사 = names(freq), 기사 = as.numeric(freq))
-    return(brod_df)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-pca <- function(vec, Random_State = 123) {
-  if (is.matrix(vec)) {
-    pca_df <- prcomp(vec, center = TRUE)$x[, 1:2]
-    pca_df <- data.frame(`component 0` = pca_df[, 1], `component 1` = pca_df[, 2])
-    return(pca_df)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-nmf <- function(vec, Random_State = 123) {
-  if (is.matrix(vec)) {
-    nmf_df <- NMF::nmf(vec, 2, seed = Random_State)$W
-    nmf_df <- data.frame(`component 0` = nmf_df[, 1], `component 1` = nmf_df[, 2])
-    return(nmf_df)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-t_sne <- function(vec, learn_Rate = 100) {
-  if (is.matrix(vec)) {
-    tsne_df <- Rtsne::Rtsne(vec, dims = 2, perplexity = learn_Rate)$Y
-    tsne_df <- data.frame(`component 0` = tsne_df[, 1], `component 1` = tsne_df[, 2])
-    return(tsne_df)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-lsa <- function(vec) {
-  if (is.matrix(vec)) {
-    svd_df <- svd(vec)$u[, 1:2]
-    svd_df <- data.frame(`component 0` = svd_df[, 1], `component 1` = svd_df[, 2])
-    return(svd_df)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-kmeans <- function(vec, k, random_state = 123) {
-  if (is.matrix(vec)) {
-    set.seed(random_state)
-    kmeans_model <- kmeans(vec, centers = k, iter.max = 1000)
-    return(kmeans_model$cluster)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-dbscan <- function(vec, eps, min_samples, metric = "euclidean") {
-  if (is.matrix(vec)) {
-    dbscan_model <- dbscan::dbscan(vec, eps = eps, minPts = min_samples, method = metric)
-    return(dbscan_model$cluster)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-meanshift <- function(vec, qt = 0.25) {
-  if (is.matrix(vec)) {
-    best_bandwidth <- density(vec)$bw
-    print(paste(qt, "기준 최적 bandwidth 값:", round(best_bandwidth, 2)))
-
-    ms_model <- meanshift(vec, bandwidth = best_bandwidth)
-    print(paste("cluster 갯수:", length(unique(ms_model))))
-    return(ms_model)
-  } else {
-    stop("input type is to be have to matrix")
-  }
-}
-
-lda <- function(dataframe, k = 10, train = 100, fit = 10) {
-  if (is.data.frame(dataframe)) {
-    lis <- keyword_parser(keyword_list(dataframe))
-    model <- LDA(lis, k = k)
-
-    for (words in lis) {
-      model$add.documents(words)
-    }
-
-    for (i in seq(0, train, fit)) {
-      model$train(fit)
-    }
-
-    return(model)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-association <- function(dataframe, min_support = 0.5, use_colnames = TRUE, min_threshold = 0.1, metric = "confidence") {
-  if (is.data.frame(dataframe)) {
-    words <- keyword_parser(keyword_list(dataframe))
-    te_data <- as(words, "transactions")
-    result <- apriori(te_data, parameter = list(supp = min_support, minlen = 2, maxlen = Inf, target = "rules"))
-    result <- as.data.frame(inspect(result))
-    result <- result[, c("lhs", "rhs", "support", metric)]
-    colnames(result) <- c("lhs", "rhs", "support", metric)
-    result <- result[result[, metric] > min_threshold, ]
-    return(result)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
diff --git a/R/topic_model.R b/R/topic_model.R
new file mode 100644
index 0000000..075bd7f
--- /dev/null
+++ b/R/topic_model.R
@@ -0,0 +1,27 @@
+#' lda
+#'
+#' 토픽 모델링을 시행합니다.
+#' 
+#' @param df BigKinds 원본 문서
+#' @param k 토픽 개수
+#'
+#' @examples
+#' lda(df, k = 10)
+#'
+#' @import tidymodels
+#' @import dplyr
+#' @import tm
+#' @export
+lda <- function(dataframe, k = 10) {
+  if (is.data.frame(dataframe)) {
+    data <- word_tokenizer(dataframe)
+    data <- data |> 
+      count(키워드, 제목) |> 
+      cast_dtm(키워드, 제목, n)
+    model <- LDA(data, k = k)
+    
+    return(model)
+  } else {
+    stop("input type is to be have to DataFrame")
+  }
+}
diff --git a/R/visualization.R b/R/visualization.R
deleted file mode 100644
index 0f51c8b..0000000
--- a/R/visualization.R
+++ /dev/null
@@ -1,58 +0,0 @@
-library(wordcloud)
-library(ggplot2)
-
-keywords_wordcloud <- function(df, press) {
-  if (is.data.frame(df)) {
-    df_keywords <- df[df$언론사 == press, ]
-    keywords <- keyword_list(df_keywords)
-    news_key <- keyword_parser(keywords)
-    news_key <- duplication_remover(news_key)
-    key <- word_counter(news_key)
-    news_key <- counter_to_dataframe(key)
-    wc <- wordcloud::wordcloud(
-      words = news_key$단어,
-      freq = news_key$빈도,
-      scale = c(3, 0.5),
-      min.freq = 1,
-      max.words = 200,
-      random.order = FALSE,
-      rot.per = 0.35,
-      colors = brewer.pal(8, "Dark2")
-    )
-    print(wc)
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-top_words <- function(df, press, top_n = 25) {
-  if (is.data.frame(df)) {
-    df_keywords <- df[grepl(press, df$언론사), ]
-    keywords <- keyword_list(df_keywords)
-    news_key <- keyword_parser(keywords)
-    news_key <- duplication_remover(news_key)
-    key <- word_counter(news_key)
-    news_key <- counter_to_dataframe(key)
-    data <- head(news_key[order(news_key$빈도, decreasing = TRUE), ], top_n)
-    ggplot(data, aes(reorder(단어, -빈도), 빈도)) +
-      geom_bar(stat = "identity", fill = "steelblue") +
-      labs(x = "단어", y = "빈도") +
-      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-      ggtitle("언론사 별 사용 단어 빈도 상위", top_n) +
-      coord_flip()
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
-
-scatterplot <- function(df, label) {
-  if (is.data.frame(df)) {
-    ggplot(df, aes(component.0, component.1, color = label)) +
-      geom_point() +
-      labs(x = "component 0", y = "component 1") +
-      ggtitle("Scatter plot for dimension reduction") +
-      theme(legend.position = "bottom")
-  } else {
-    stop("input type is to be have to DataFrame")
-  }
-}
diff --git a/R/wordclund.R b/R/wordclund.R
new file mode 100644
index 0000000..9204a40
--- /dev/null
+++ b/R/wordclund.R
@@ -0,0 +1,24 @@
+#' keywords_wordcloud
+#'
+#' 언론사 별로 가장 많이 등장한 단어 순위를 wordcloud로 시각화합니다.
+#'
+#' @param df BigKinds 원본 문서
+#' @param press 확인할 언론사 이름
+#'
+#' @examples
+#' keywords_wordcloud(df, "조선일보")
+#' @import wordcloud2
+#' @import dplyr
+#' @export
+keywords_wordcloud <- function(df, press=NA) {
+  if (is.data.frame(df)) {
+    if (!is.na(press)){
+      df <- df |> filter(언론사 == press)
+    }
+    words <- keyword_dataframe(df)
+    wordcloud2(words)
+  } else {
+    stop("input type is to be have to DataFrame")
+  }
+}
+
diff --git a/man/DBSCAN.Rd b/man/DBSCAN.Rd
new file mode 100644
index 0000000..eb78cb7
--- /dev/null
+++ b/man/DBSCAN.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clustering.R
+\name{DBSCAN}
+\alias{DBSCAN}
+\title{DBSCAN}
+\usage{
+DBSCAN(vec, eps, min_samples, metric = "euclidean")
+}
+\arguments{
+\item{eps}{epsilon 값(보폭)}
+
+\item{metric}{거리 계산 방법(default = euclidean)}
+
+\item{df}{BigKinds 원본 문서}
+
+\item{min_sample}{최적 샘플 갯수}
+}
+\description{
+DBSCAN 알고리즘을 진행합니다.
+}
+\examples{
+DBSCAN(vec, eps = 0.5, min_sample = 3)
+
+}
diff --git a/man/Kmeans.Rd b/man/Kmeans.Rd
new file mode 100644
index 0000000..4b8627b
--- /dev/null
+++ b/man/Kmeans.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clustering.R
+\name{Kmeans}
+\alias{Kmeans}
+\title{Kmeans}
+\usage{
+Kmeans(vec, k, random_state = 123)
+}
+\arguments{
+\item{k}{형성할 군집 갯수}
+
+\item{random_state}{seed 값}
+
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+kmeans clustering을 진행합니다.
+}
+\examples{
+Kmeans(df, k = 3)
+
+}
diff --git a/man/MeanShift.Rd b/man/MeanShift.Rd
new file mode 100644
index 0000000..d81e157
--- /dev/null
+++ b/man/MeanShift.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clustering.R
+\name{MeanShift}
+\alias{MeanShift}
+\title{MeanShift}
+\usage{
+MeanShift(vec, qt = 0.25)
+}
+\arguments{
+\item{qt}{quantile 값(최적 bandwidth 추정을 위함)}
+
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+mean shift clustering을 진행합니다.
+}
+\examples{
+MeanShift(vec, at = 0.5)
+
+}
diff --git a/man/association.Rd b/man/association.Rd
new file mode 100644
index 0000000..5ca3240
--- /dev/null
+++ b/man/association.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/association.R
+\name{association}
+\alias{association}
+\title{association}
+\usage{
+association(df, min_support = 0.5, minlen = 2, maxlen = 10)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+
+\item{min_support}{최소 지지도}
+
+\item{minlen}{연관된 최소 갯수}
+
+\item{maxlen}{연관된 최대 갯수}
+}
+\description{
+기사에 등장한 단어 별로 연관분석을 진행합니다.
+연관분석 방법은 Apriori입니다.
+}
+\examples{
+association(df, min_support = 0.6, minlen = 3, maxlen = 10)
+}
diff --git a/man/counter_to_dataframe.Rd b/man/counter_to_dataframe.Rd
deleted file mode 100644
index 1ea98fa..0000000
--- a/man/counter_to_dataframe.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/base.R
-\name{counter_to_dataframe}
-\alias{counter_to_dataframe}
-\title{counter dict --> dataframe}
-\usage{
-counter_to_dataframe(key_words)
-}
-\arguments{
-\item{infile}{Path to the input file}
-}
-\value{
-A matrix of the infile
-}
-\description{
-counter dict --> dataframe
-}
diff --git a/man/day_range.Rd b/man/day_range.Rd
new file mode 100644
index 0000000..0409eb7
--- /dev/null
+++ b/man/day_range.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/day_range.R
+\name{day_range}
+\alias{day_range}
+\title{day_range}
+\usage{
+day_range(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+단어 범위를 return합니다.
+}
+\examples{
+day_range(df)
+
+}
diff --git a/man/duplication_remover.Rd b/man/duplication_remover.Rd
deleted file mode 100644
index b58d02a..0000000
--- a/man/duplication_remover.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/base.R
-\name{duplication_remover}
-\alias{duplication_remover}
-\title{중복 값 제거}
-\usage{
-duplication_remover(news_key)
-}
-\arguments{
-\item{infile}{Path to the input file}
-}
-\value{
-A matrix of the infile
-}
-\description{
-중복 값 제거
-}
diff --git a/man/header_remover.Rd b/man/header_remover.Rd
index 489af16..d3b7bed 100644
--- a/man/header_remover.Rd
+++ b/man/header_remover.Rd
@@ -1,17 +1,18 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/base.R
+% Please edit documentation in R/global.R
 \name{header_remover}
 \alias{header_remover}
-\title{[] 표시된 헤더 삭제}
+\title{header_remover}
 \usage{
 header_remover(df)
 }
 \arguments{
-\item{infile}{Path to the input file}
-}
-\value{
-A matrix of the infile
+\item{df}{BigKinds 원본 문서}
 }
 \description{
-[] 표시된 헤더 삭제
+상단에 존재하는 헤더를 제거합니다.
+}
+\examples{
+data <- header_remover(df)
+head(data)
 }
diff --git a/man/keyword_dataframe.Rd b/man/keyword_dataframe.Rd
new file mode 100644
index 0000000..a30cb01
--- /dev/null
+++ b/man/keyword_dataframe.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/global.R
+\name{keyword_dataframe}
+\alias{keyword_dataframe}
+\title{keyword_dataframe}
+\usage{
+keyword_dataframe(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+BigKinds 데이터 셋을 키워드 갯수 데이터프레임으로 변환합니다.
+}
+\examples{
+data <- keyword_dataframe(df)
+view(data)
+}
diff --git a/man/keyword_dataframe_no_duplicated.Rd b/man/keyword_dataframe_no_duplicated.Rd
new file mode 100644
index 0000000..43b5d13
--- /dev/null
+++ b/man/keyword_dataframe_no_duplicated.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/global.R
+\name{keyword_dataframe_no_duplicated}
+\alias{keyword_dataframe_no_duplicated}
+\title{keyword_dataframe_no_duplicated}
+\usage{
+keyword_dataframe_no_duplicated(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+BigKinds 데이터 셋을 키워드 갯수 데이터프레임(중복 미포함)으로 변환합니다.
+}
+\examples{
+data <- keyword_dataframe_no_duplicated(df)
+view(data)
+}
diff --git a/man/keyword_list.Rd b/man/keyword_list.Rd
deleted file mode 100644
index 241e534..0000000
--- a/man/keyword_list.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/base.R
-\name{keyword_list}
-\alias{keyword_list}
-\title{키워드를 list로 변환}
-\usage{
-keyword_list(df)
-}
-\arguments{
-\item{infile}{Path to the input file}
-}
-\value{
-A matrix of the infile
-}
-\description{
-키워드를 list로 변환
-}
diff --git a/man/keyword_parser.Rd b/man/keyword_parser.Rd
deleted file mode 100644
index 1c75b7f..0000000
--- a/man/keyword_parser.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/base.R
-\name{keyword_parser}
-\alias{keyword_parser}
-\title{[] 키워드 파싱}
-\usage{
-keyword_parser(text_list)
-}
-\arguments{
-\item{infile}{Path to the input file}
-}
-\value{
-A matrix of the infile
-}
-\description{
-[] 키워드 파싱
-}
diff --git a/man/keywords_wordcloud.Rd b/man/keywords_wordcloud.Rd
new file mode 100644
index 0000000..b7886cc
--- /dev/null
+++ b/man/keywords_wordcloud.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wordclund.R
+\name{keywords_wordcloud}
+\alias{keywords_wordcloud}
+\title{keywords_wordcloud}
+\usage{
+keywords_wordcloud(df, press = NA)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+
+\item{press}{확인할 언론사 이름}
+}
+\description{
+언론사 별로 가장 많이 등장한 단어 순위를 wordcloud로 시각화합니다.
+}
+\examples{
+keywords_wordcloud(df, "조선일보")
+}
diff --git a/man/lda.Rd b/man/lda.Rd
new file mode 100644
index 0000000..8c91d94
--- /dev/null
+++ b/man/lda.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/topic_model.R
+\name{lda}
+\alias{lda}
+\title{lda}
+\usage{
+lda(dataframe, k = 10)
+}
+\arguments{
+\item{k}{토픽 개수}
+
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+토픽 모델링을 시행합니다.
+}
+\examples{
+lda(df, k = 10)
+
+}
diff --git a/man/normalize_vector.Rd b/man/normalize_vector.Rd
new file mode 100644
index 0000000..5e50ab5
--- /dev/null
+++ b/man/normalize_vector.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/global.R
+\name{normalize_vector}
+\alias{normalize_vector}
+\title{normalize_vector}
+\usage{
+normalize_vector(vec)
+}
+\arguments{
+\item{vec}{tfidf vector}
+}
+\description{
+벡터를 정규화합니다.(row 기준 minmax scaling)
+}
diff --git a/man/press_counter.Rd b/man/press_counter.Rd
new file mode 100644
index 0000000..f236414
--- /dev/null
+++ b/man/press_counter.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/press_counter.R
+\name{press_counter}
+\alias{press_counter}
+\title{press_counter}
+\usage{
+press_counter(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+언론사 별 기사의 갯수를 반환합니다.
+}
+\examples{
+press_counter(df)
+
+}
diff --git a/man/tfidf.Rd b/man/tfidf.Rd
new file mode 100644
index 0000000..15d6895
--- /dev/null
+++ b/man/tfidf.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/global.R
+\name{tfidf}
+\alias{tfidf}
+\title{tfidf}
+\usage{
+tfidf(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+키워드의 tfidf score를 포함한 데이터 프레임을 반환합니다.
+}
+\examples{
+data <- tfidf(df)
+view(data)
+}
diff --git a/man/tfidf_vector.Rd b/man/tfidf_vector.Rd
new file mode 100644
index 0000000..a7408c7
--- /dev/null
+++ b/man/tfidf_vector.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/global.R
+\name{tfidf_vector}
+\alias{tfidf_vector}
+\title{tfidf_vector}
+\usage{
+tfidf_vector(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+tfidf vector로 변환합니다.
+}
+\examples{
+data <- tfidf_vector(df)
+view(data)
+}
diff --git a/man/top_words.Rd b/man/top_words.Rd
new file mode 100644
index 0000000..685fcd5
--- /dev/null
+++ b/man/top_words.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/barplot.R
+\name{top_words}
+\alias{top_words}
+\title{top_words}
+\usage{
+top_words(df, press = NA, top_n = 25)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+
+\item{press}{확인할 언론사 이름}
+
+\item{top_n}{시각화할 단어 갯수}
+}
+\description{
+언론사 별로 가장 많이 등장한 단어 순위를 시각화합니다.
+최대 몇개의 단어를 추출할지는 직접 정할 수 있습니다.
+default는 25개입니다.
+}
+\examples{
+top_words(df, "경향신문", top_n=30)
+}
diff --git a/man/word_counter.Rd b/man/word_counter.Rd
deleted file mode 100644
index 19361f6..0000000
--- a/man/word_counter.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/base.R
-\name{word_counter}
-\alias{word_counter}
-\title{단어 갯수 카운트}
-\usage{
-word_counter(news_value)
-}
-\arguments{
-\item{infile}{Path to the input file}
-}
-\value{
-A matrix of the infile
-}
-\description{
-단어 갯수 카운트
-}
diff --git a/man/word_tokenizer.Rd b/man/word_tokenizer.Rd
new file mode 100644
index 0000000..43cbedd
--- /dev/null
+++ b/man/word_tokenizer.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/global.R
+\name{word_tokenizer}
+\alias{word_tokenizer}
+\title{word_tokenizer}
+\usage{
+word_tokenizer(df)
+}
+\arguments{
+\item{df}{BigKinds 원본 문서}
+}
+\description{
+파일로부터 문서 별 키워드로 나열된 데이터 프레임으로 변환합니다.
+}
+\examples{
+data <- word_tokenizer(df)
+view(data)
+}