knnchapter.html

<!DOCTYPE html>
<html >

<head>

  <meta charset="UTF-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <title>Chapter 7 KNN - K Nearest Neighbour | Machine Learning with R</title>
  <meta name="description" content="This book is about using R for machine learning purposes.">
  <meta name="generator" content="bookdown  and GitBook 2.6.7">

  <meta property="og:title" content="Chapter 7 KNN - K Nearest Neighbour | Machine Learning with R" />
  <meta property="og:type" content="book" />
  
  
  <meta property="og:description" content="This book is about using R for machine learning purposes." />
  <meta name="github-repo" content="fderyckel/machinelearningwithr" />

  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Chapter 7 KNN - K Nearest Neighbour | Machine Learning with R" />
  
  <meta name="twitter:description" content="This book is about using R for machine learning purposes." />
  

<meta name="author" content="François de Ryckel">


<meta name="date" content="2019-02-23">

  <meta name="viewport" content="width=device-width, initial-scale=1">
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black">
  
  
<link rel="prev" href="gradient-descent.html">
<link rel="next" href="kmeans.html">
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />


<script src="libs/kePrint-0.0.1/kePrint.js"></script>


<style type="text/css">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
  margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
</style>

<link rel="stylesheet" href="style.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><strong><a href="./">Machine Learning with R</a></strong></li>

<li class="divider"></li>
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Prerequisites</a><ul>
<li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#pre-requisite-and-conventions"><i class="fa fa-check"></i><b>1.1</b> Pre-requisite and conventions</a></li>
<li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#organization"><i class="fa fa-check"></i><b>1.2</b> Organization</a></li>
<li class="chapter" data-level="1.3" data-path="index.html"><a href="index.html#packages"><i class="fa fa-check"></i><b>1.3</b> Packages</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="testinference.html"><a href="testinference.html"><i class="fa fa-check"></i><b>2</b> Tests and inferences</a><ul>
<li class="chapter" data-level="2.1" data-path="testinference.html"><a href="testinference.html#normality"><i class="fa fa-check"></i><b>2.1</b> Assumption of normality</a><ul>
<li class="chapter" data-level="2.1.1" data-path="testinference.html"><a href="testinference.html#visual-check-of-normality"><i class="fa fa-check"></i><b>2.1.1</b> Visual check of normality</a></li>
<li class="chapter" data-level="2.1.2" data-path="testinference.html"><a href="testinference.html#normality-tests"><i class="fa fa-check"></i><b>2.1.2</b> Normality tests</a></li>
</ul></li>
<li class="chapter" data-level="2.2" data-path="testinference.html"><a href="testinference.html#ttest"><i class="fa fa-check"></i><b>2.2</b> T-tests</a></li>
<li class="chapter" data-level="2.3" data-path="testinference.html"><a href="testinference.html#anova---analyse-of-variance."><i class="fa fa-check"></i><b>2.3</b> ANOVA - Analyse of variance.</a></li>
<li class="chapter" data-level="2.4" data-path="testinference.html"><a href="testinference.html#covariance"><i class="fa fa-check"></i><b>2.4</b> Covariance</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="mlr.html"><a href="mlr.html"><i class="fa fa-check"></i><b>3</b> Single &amp; Multiple Linear Regression</a><ul>
<li class="chapter" data-level="3.1" data-path="mlr.html"><a href="mlr.html#single-variable-regression"><i class="fa fa-check"></i><b>3.1</b> Single variable regression</a></li>
<li class="chapter" data-level="3.2" data-path="mlr.html"><a href="mlr.html#multi-variables-regression"><i class="fa fa-check"></i><b>3.2</b> Multi-variables regression</a><ul>
<li class="chapter" data-level="3.2.1" data-path="mlr.html"><a href="mlr.html#predicting-wine-price-again"><i class="fa fa-check"></i><b>3.2.1</b> Predicting wine price (again!)</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="mlr.html"><a href="mlr.html#model-diagnostic-and-evaluation"><i class="fa fa-check"></i><b>3.3</b> Model diagnostic and evaluation</a></li>
<li class="chapter" data-level="3.4" data-path="mlr.html"><a href="mlr.html#final-example---boston-dataset---with-backward-elimination"><i class="fa fa-check"></i><b>3.4</b> Final example - Boston dataset - with backward elimination</a><ul>
<li class="chapter" data-level="3.4.1" data-path="mlr.html"><a href="mlr.html#model-diagmostic"><i class="fa fa-check"></i><b>3.4.1</b> Model diagmostic</a></li>
</ul></li>
<li class="chapter" data-level="3.5" data-path="mlr.html"><a href="mlr.html#references"><i class="fa fa-check"></i><b>3.5</b> References</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="logistic.html"><a href="logistic.html"><i class="fa fa-check"></i><b>4</b> Logistic Regression</a><ul>
<li class="chapter" data-level="4.1" data-path="logistic.html"><a href="logistic.html#introduction"><i class="fa fa-check"></i><b>4.1</b> Introduction</a></li>
<li class="chapter" data-level="4.2" data-path="logistic.html"><a href="logistic.html#the-logistic-equation."><i class="fa fa-check"></i><b>4.2</b> The logistic equation.</a></li>
<li class="chapter" data-level="4.3" data-path="logistic.html"><a href="logistic.html#performance-of-logistic-regression-model"><i class="fa fa-check"></i><b>4.3</b> Performance of Logistic Regression Model</a></li>
<li class="chapter" data-level="4.4" data-path="logistic.html"><a href="logistic.html#setting-up"><i class="fa fa-check"></i><b>4.4</b> Setting up</a></li>
<li class="chapter" data-level="4.5" data-path="logistic.html"><a href="logistic.html#example-1---graduate-admission"><i class="fa fa-check"></i><b>4.5</b> Example 1 - Graduate Admission</a></li>
<li class="chapter" data-level="4.6" data-path="logistic.html"><a href="logistic.html#example-2---diabetes"><i class="fa fa-check"></i><b>4.6</b> Example 2 - Diabetes</a><ul>
<li class="chapter" data-level="4.6.1" data-path="logistic.html"><a href="logistic.html#accounting-for-missing-values"><i class="fa fa-check"></i><b>4.6.1</b> Accounting for missing values</a></li>
<li class="chapter" data-level="4.6.2" data-path="logistic.html"><a href="logistic.html#imputting-missing-values"><i class="fa fa-check"></i><b>4.6.2</b> Imputting Missing Values</a></li>
<li class="chapter" data-level="4.6.3" data-path="logistic.html"><a href="logistic.html#roc-and-auc"><i class="fa fa-check"></i><b>4.6.3</b> ROC and AUC</a></li>
</ul></li>
<li class="chapter" data-level="4.7" data-path="logistic.html"><a href="logistic.html#references-1"><i class="fa fa-check"></i><b>4.7</b> References</a></li>
</ul></li>
<li class="chapter" data-level="5" data-path="softmax-and-multinomial-regressions.html"><a href="softmax-and-multinomial-regressions.html"><i class="fa fa-check"></i><b>5</b> Softmax and multinomial regressions</a><ul>
<li class="chapter" data-level="5.1" data-path="softmax-and-multinomial-regressions.html"><a href="softmax-and-multinomial-regressions.html#multinomial-logistic-regression"><i class="fa fa-check"></i><b>5.1</b> Multinomial Logistic Regression</a></li>
<li class="chapter" data-level="5.2" data-path="softmax-and-multinomial-regressions.html"><a href="softmax-and-multinomial-regressions.html#references-2"><i class="fa fa-check"></i><b>5.2</b> References</a></li>
</ul></li>
<li class="chapter" data-level="6" data-path="gradient-descent.html"><a href="gradient-descent.html"><i class="fa fa-check"></i><b>6</b> Gradient Descent</a><ul>
<li class="chapter" data-level="6.1" data-path="gradient-descent.html"><a href="gradient-descent.html#example-on-functions"><i class="fa fa-check"></i><b>6.1</b> Example on functions</a></li>
<li class="chapter" data-level="6.2" data-path="gradient-descent.html"><a href="gradient-descent.html#example-on-regressions"><i class="fa fa-check"></i><b>6.2</b> Example on regressions</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="knnchapter.html"><a href="knnchapter.html"><i class="fa fa-check"></i><b>7</b> KNN - K Nearest Neighbour</a><ul>
<li class="chapter" data-level="7.1" data-path="knnchapter.html"><a href="knnchapter.html#example-1.-prostate-cancer-dataset"><i class="fa fa-check"></i><b>7.1</b> Example 1. Prostate Cancer dataset</a></li>
<li class="chapter" data-level="7.2" data-path="knnchapter.html"><a href="knnchapter.html#example-2.-wine-dataset"><i class="fa fa-check"></i><b>7.2</b> Example 2. Wine dataset</a><ul>
<li class="chapter" data-level="7.2.1" data-path="knnchapter.html"><a href="knnchapter.html#understand-the-data"><i class="fa fa-check"></i><b>7.2.1</b> Understand the data</a></li>
</ul></li>
<li class="chapter" data-level="7.3" data-path="knnchapter.html"><a href="knnchapter.html#references-3"><i class="fa fa-check"></i><b>7.3</b> References</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="kmeans.html"><a href="kmeans.html"><i class="fa fa-check"></i><b>8</b> Kmeans clustering</a><ul>
<li class="chapter" data-level="8.1" data-path="kmeans.html"><a href="kmeans.html#multinomial-logistic-regression-1"><i class="fa fa-check"></i><b>8.1</b> Multinomial Logistic Regression</a></li>
<li class="chapter" data-level="8.2" data-path="kmeans.html"><a href="kmeans.html#references-4"><i class="fa fa-check"></i><b>8.2</b> References</a></li>
</ul></li>
<li class="chapter" data-level="9" data-path="hierclust.html"><a href="hierclust.html"><i class="fa fa-check"></i><b>9</b> Hierarichal Clustering</a><ul>
<li class="chapter" data-level="9.1" data-path="hierclust.html"><a href="hierclust.html#example-on-the-pokemon-dataset"><i class="fa fa-check"></i><b>9.1</b> Example on the Pokemon dataset</a></li>
<li class="chapter" data-level="9.2" data-path="hierclust.html"><a href="hierclust.html#example-on-regressions-1"><i class="fa fa-check"></i><b>9.2</b> Example on regressions</a></li>
<li class="chapter" data-level="9.3" data-path="hierclust.html"><a href="hierclust.html#references-5"><i class="fa fa-check"></i><b>9.3</b> References</a></li>
</ul></li>
<li class="chapter" data-level="10" data-path="pca.html"><a href="pca.html"><i class="fa fa-check"></i><b>10</b> Principal Component Analysis</a><ul>
<li class="chapter" data-level="10.1" data-path="pca.html"><a href="pca.html#pca-on-an-easy-example."><i class="fa fa-check"></i><b>10.1</b> PCA on an easy example.</a></li>
<li class="chapter" data-level="10.2" data-path="pca.html"><a href="pca.html#references."><i class="fa fa-check"></i><b>10.2</b> References.</a></li>
</ul></li>
<li class="chapter" data-level="11" data-path="trees-and-classification.html"><a href="trees-and-classification.html"><i class="fa fa-check"></i><b>11</b> Trees and Classification</a><ul>
<li class="chapter" data-level="11.1" data-path="trees-and-classification.html"><a href="trees-and-classification.html#introduction-1"><i class="fa fa-check"></i><b>11.1</b> Introduction</a></li>
<li class="chapter" data-level="11.2" data-path="trees-and-classification.html"><a href="trees-and-classification.html#first-example."><i class="fa fa-check"></i><b>11.2</b> First example.</a></li>
<li class="chapter" data-level="11.3" data-path="trees-and-classification.html"><a href="trees-and-classification.html#second-example."><i class="fa fa-check"></i><b>11.3</b> Second Example.</a></li>
<li class="chapter" data-level="11.4" data-path="trees-and-classification.html"><a href="trees-and-classification.html#how-does-a-tree-decide-where-to-split"><i class="fa fa-check"></i><b>11.4</b> How does a tree decide where to split?</a></li>
<li class="chapter" data-level="11.5" data-path="trees-and-classification.html"><a href="trees-and-classification.html#third-example."><i class="fa fa-check"></i><b>11.5</b> Third example.</a></li>
<li class="chapter" data-level="11.6" data-path="trees-and-classification.html"><a href="trees-and-classification.html#references-6"><i class="fa fa-check"></i><b>11.6</b> References</a></li>
</ul></li>
<li class="chapter" data-level="12" data-path="random-forest.html"><a href="random-forest.html"><i class="fa fa-check"></i><b>12</b> Random Forest</a><ul>
<li class="chapter" data-level="12.1" data-path="random-forest.html"><a href="random-forest.html#how-does-it-work"><i class="fa fa-check"></i><b>12.1</b> How does it work?</a></li>
<li class="chapter" data-level="12.2" data-path="random-forest.html"><a href="random-forest.html#references-7"><i class="fa fa-check"></i><b>12.2</b> References</a></li>
</ul></li>
<li class="chapter" data-level="13" data-path="svm.html"><a href="svm.html"><i class="fa fa-check"></i><b>13</b> Support Vector Machine</a><ul>
<li class="chapter" data-level="13.1" data-path="svm.html"><a href="svm.html#support-vecotr-regression"><i class="fa fa-check"></i><b>13.1</b> Support Vecotr Regression</a><ul>
<li class="chapter" data-level="13.1.1" data-path="svm.html"><a href="svm.html#create-data"><i class="fa fa-check"></i><b>13.1.1</b> Create data</a></li>
<li class="chapter" data-level="13.1.2" data-path="svm.html"><a href="svm.html#tuning-a-svm-model"><i class="fa fa-check"></i><b>13.1.2</b> Tuning a SVM model</a></li>
<li class="chapter" data-level="13.1.3" data-path="svm.html"><a href="svm.html#discussion-on-parameters"><i class="fa fa-check"></i><b>13.1.3</b> Discussion on parameters</a></li>
</ul></li>
<li class="chapter" data-level="13.2" data-path="svm.html"><a href="svm.html#references-8"><i class="fa fa-check"></i><b>13.2</b> References</a></li>
</ul></li>
<li class="chapter" data-level="14" data-path="model-evaluation.html"><a href="model-evaluation.html"><i class="fa fa-check"></i><b>14</b> Model Evaluation</a><ul>
<li class="chapter" data-level="14.1" data-path="model-evaluation.html"><a href="model-evaluation.html#biais-variance-tradeoff"><i class="fa fa-check"></i><b>14.1</b> Biais variance tradeoff</a></li>
<li class="chapter" data-level="14.2" data-path="model-evaluation.html"><a href="model-evaluation.html#bagging"><i class="fa fa-check"></i><b>14.2</b> Bagging</a></li>
<li class="chapter" data-level="14.3" data-path="model-evaluation.html"><a href="model-evaluation.html#crossvalidation"><i class="fa fa-check"></i><b>14.3</b> Cross Validation</a></li>
</ul></li>
<li class="chapter" data-level="15" data-path="case-study-text-classification-spam-and-ham-.html"><a href="case-study-text-classification-spam-and-ham-.html"><i class="fa fa-check"></i><b>15</b> Case Study - Text classification: Spam and Ham.</a></li>
<li class="chapter" data-level="16" data-path="mushroom.html"><a href="mushroom.html"><i class="fa fa-check"></i><b>16</b> Case Study - Mushrooms Classification</a><ul>
<li class="chapter" data-level="16.1" data-path="mushroom.html"><a href="mushroom.html#import-the-data"><i class="fa fa-check"></i><b>16.1</b> Import the data</a></li>
<li class="chapter" data-level="16.2" data-path="mushroom.html"><a href="mushroom.html#tidy-the-data"><i class="fa fa-check"></i><b>16.2</b> Tidy the data</a></li>
<li class="chapter" data-level="16.3" data-path="mushroom.html"><a href="mushroom.html#understand-the-data-1"><i class="fa fa-check"></i><b>16.3</b> Understand the data</a><ul>
<li class="chapter" data-level="16.3.1" data-path="mushroom.html"><a href="mushroom.html#transform-the-data"><i class="fa fa-check"></i><b>16.3.1</b> Transform the data</a></li>
<li class="chapter" data-level="16.3.2" data-path="mushroom.html"><a href="mushroom.html#visualize-the-data"><i class="fa fa-check"></i><b>16.3.2</b> Visualize the data</a></li>
<li class="chapter" data-level="16.3.3" data-path="mushroom.html"><a href="mushroom.html#modeling"><i class="fa fa-check"></i><b>16.3.3</b> Modeling</a></li>
</ul></li>
<li class="chapter" data-level="16.4" data-path="mushroom.html"><a href="mushroom.html#communication"><i class="fa fa-check"></i><b>16.4</b> Communication</a></li>
</ul></li>
<li class="chapter" data-level="17" data-path="case-study-the-adults-dataset-.html"><a href="case-study-the-adults-dataset-.html"><i class="fa fa-check"></i><b>17</b> Case study - The adults dataset.</a><ul>
<li class="chapter" data-level="17.1" data-path="case-study-the-adults-dataset-.html"><a href="case-study-the-adults-dataset-.html#introduction-2"><i class="fa fa-check"></i><b>17.1</b> Introduction</a></li>
<li class="chapter" data-level="17.2" data-path="case-study-the-adults-dataset-.html"><a href="case-study-the-adults-dataset-.html#import-the-data-1"><i class="fa fa-check"></i><b>17.2</b> Import the data</a></li>
<li class="chapter" data-level="17.3" data-path="case-study-the-adults-dataset-.html"><a href="case-study-the-adults-dataset-.html#tidy-the-data-1"><i class="fa fa-check"></i><b>17.3</b> Tidy the data</a></li>
</ul></li>
<li class="chapter" data-level="18" data-path="breastcancer.html"><a href="breastcancer.html"><i class="fa fa-check"></i><b>18</b> Case Study - Wisconsin Breast Cancer</a><ul>
<li class="chapter" data-level="18.1" data-path="breastcancer.html"><a href="breastcancer.html#import-the-data-2"><i class="fa fa-check"></i><b>18.1</b> Import the data</a></li>
<li class="chapter" data-level="18.2" data-path="breastcancer.html"><a href="breastcancer.html#tidy-the-data-2"><i class="fa fa-check"></i><b>18.2</b> Tidy the data</a></li>
<li class="chapter" data-level="18.3" data-path="breastcancer.html"><a href="breastcancer.html#understand-the-data-2"><i class="fa fa-check"></i><b>18.3</b> Understand the data</a><ul>
<li class="chapter" data-level="18.3.1" data-path="breastcancer.html"><a href="breastcancer.html#transform-the-data-1"><i class="fa fa-check"></i><b>18.3.1</b> Transform the data</a></li>
<li class="chapter" data-level="18.3.2" data-path="breastcancer.html"><a href="breastcancer.html#pre-process-the-data"><i class="fa fa-check"></i><b>18.3.2</b> Pre-process the data</a></li>
<li class="chapter" data-level="18.3.3" data-path="breastcancer.html"><a href="breastcancer.html#model-the-data-1"><i class="fa fa-check"></i><b>18.3.3</b> Model the data</a></li>
</ul></li>
<li class="chapter" data-level="18.4" data-path="breastcancer.html"><a href="breastcancer.html#references-9"><i class="fa fa-check"></i><b>18.4</b> References</a></li>
</ul></li>
<li class="chapter" data-level="19" data-path="final-words.html"><a href="final-words.html"><i class="fa fa-check"></i><b>19</b> Final Words</a></li>
<li class="chapter" data-level="" data-path="references-10.html"><a href="references-10.html"><i class="fa fa-check"></i>References</a></li>
</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Machine Learning with R</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="knnchapter" class="section level1">
<h1><span class="header-section-number">Chapter 7</span> KNN - K Nearest Neighbour</h1>
<p>Clustering is an unsupervised learning technique. It is the task of grouping together a set of objects in a way that objects in the same cluster are more similar to each other than to objects in other clusters. Similarity is an amount that reflects the strength of relationship between two data objects. Clustering is mainly used for exploratory data mining.</p>
<p>The KNN algorithm is a robust and versatile classifier that is often used as a benchmark for more complex classifiers such as Artificial Neural Networks (ANN) and Support Vector Machines (SVM). Despite its simplicity, KNN can outperform more powerful classifiers and is used in a variety of applications.</p>
<p>The KNN classifier is also a non parametric and instance-based learning algorithm.</p>
<p><strong>Non-parametric</strong> means it makes no explicit assumptions about the functional form of h, avoiding the dangers of mismodeling the underlying distribution of the data. For example, suppose our data is highly non-Gaussian but the learning model we choose assumes a Gaussian form. In that case, our algorithm would make extremely poor predictions.</p>
<p><strong>Instance-based</strong> learning means that our algorithm doesn’t explicitly learn a model (lazy learner). Instead, it chooses to memorize the training instances which are subsequently used as “knowledge” for the prediction phase. Concretely, this means that only when a query to our database is made (i.e. when we ask it to predict a label given an input), will the algorithm use the training instances to spit out an answer.</p>
<p>It is worth noting that the minimal training phase of KNN comes both at a memory cost, since we must store a potentially huge data set, as well as a computational cost during test time since classifying a given observation requires a run down of the whole data set. Practically speaking, this is undesirable since we usually want fast responses.</p>
<p>The principle behind KNN classifier (K-Nearest Neighbor) algorithm is to find K predefined number of training samples that are closest in the distance to a new point &amp; predict a label for our new point using these samples.</p>
<p>When K is small, we are restraining the region of a given prediction and forcing our classifier to be “more blind” to the overall distribution. A small value for K provides the most flexible fit, which will have low bias but high variance. Graphically, our decision boundary will be more jagged.</p>
<div class="figure">
<img src="otherpics/knn01.png" alt="KNN with k = 1" />
<p class="caption">KNN with k = 1</p>
</div>
<p>On the other hand, a higher K averages more voters in each prediction and hence is more resilient to outliers. Larger values of K will have smoother decision boundaries which means lower variance but increased bias.</p>
<div class="figure">
<img src="otherpics/knn20.png" alt="KNN with k = 20" />
<p class="caption">KNN with k = 20</p>
</div>
<p>What we are observing here is that increasing k will decrease variance and increase bias. While decreasing k will increase variance and decrease bias. Take a look at how variable the predictions are for different data sets at low k. As k increases this variability is reduced. But if we increase k too much, then we no longer follow the true boundary line and we observe high bias. This is the nature of the Bias-Variance Tradeoff.</p>
<p>Clustering can be broadly divided into two subgroups:</p>
<ul>
<li>Hard clustering: in hard clustering, each data object or point either belongs to a cluster completely or not. For example in the Uber dataset, each location belongs to either one borough or the other.</li>
<li>Soft clustering: in soft clustering, a data point can belong to more than one cluster with some probability or likelihood value. For example, you could identify some locations as the border points belonging to two or more boroughs.</li>
</ul>
<div id="example-1.-prostate-cancer-dataset" class="section level2">
<h2><span class="header-section-number">7.1</span> Example 1. Prostate Cancer dataset</h2>

<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df &lt;-<span class="st"> </span><span class="kw">read_csv</span>(<span class="st">&quot;dataset/prostate_cancer.csv&quot;</span>)
<span class="kw">glimpse</span>(df)</code></pre></div>
<pre><code>## Observations: 100
## Variables: 10
## $ id                &lt;dbl&gt; 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
## $ diagnosis_result  &lt;chr&gt; &quot;M&quot;, &quot;B&quot;, &quot;M&quot;, &quot;M&quot;, &quot;M&quot;, &quot;B&quot;, &quot;M&quot;, &quot;M&quot;, &quot;M&quot;, &quot;…
## $ radius            &lt;dbl&gt; 23, 9, 21, 14, 9, 25, 16, 15, 19, 25, 24, 17, …
## $ texture           &lt;dbl&gt; 12, 13, 27, 16, 19, 25, 26, 18, 24, 11, 21, 15…
## $ perimeter         &lt;dbl&gt; 151, 133, 130, 78, 135, 83, 120, 90, 88, 84, 1…
## $ area              &lt;dbl&gt; 954, 1326, 1203, 386, 1297, 477, 1040, 578, 52…
## $ smoothness        &lt;dbl&gt; 0.143, 0.143, 0.125, 0.070, 0.141, 0.128, 0.09…
## $ compactness       &lt;dbl&gt; 0.278, 0.079, 0.160, 0.284, 0.133, 0.170, 0.10…
## $ symmetry          &lt;dbl&gt; 0.242, 0.181, 0.207, 0.260, 0.181, 0.209, 0.17…
## $ fractal_dimension &lt;dbl&gt; 0.079, 0.057, 0.060, 0.097, 0.059, 0.076, 0.05…</code></pre>
<p>Change the diagnosis result into a factor, then remove the <code>ID</code> variable as it does not bring anything.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df<span class="op">$</span>diagnosis_result &lt;-<span class="st"> </span><span class="kw">factor</span>(df<span class="op">$</span>diagnosis_result, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="st">&quot;B&quot;</span>, <span class="st">&quot;M&quot;</span>), 
                               <span class="dt">labels =</span> <span class="kw">c</span>(<span class="st">&quot;Benign&quot;</span>, <span class="st">&quot;Malignant&quot;</span>))
df2 &lt;-<span class="st"> </span>df <span class="op">%&gt;%</span><span class="st"> </span><span class="kw">select</span>(<span class="op">-</span>id)

<span class="co"># Checking how balance is the dependend variable </span>
<span class="kw">prop.table</span>(<span class="kw">table</span>(df2<span class="op">$</span>diagnosis_result))</code></pre></div>
<pre><code>## 
##    Benign Malignant 
##      0.38      0.62</code></pre>
<p>It is quite typical of such medical dataset to be unbalanced. We’ll have to deal with it.</p>
<p>Like with PCA, KNN is quite sensitve to the scale of the variable. So it is important to first standardize the variables. This time we’ll do this using the <code>preProcess</code> funnction of the <code>caret</code> package.<br />
 </p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(caret)
param_preproc_df2 &lt;-<span class="st"> </span><span class="kw">preProcess</span>(df2[,<span class="dv">2</span><span class="op">:</span><span class="dv">9</span>], <span class="dt">method =</span> <span class="kw">c</span>(<span class="st">&quot;scale&quot;</span>, <span class="st">&quot;center&quot;</span>))
df3_stdize &lt;-<span class="st"> </span><span class="kw">predict</span>(param_preproc_df2, df2[, <span class="dv">2</span><span class="op">:</span><span class="dv">9</span>])

<span class="kw">summary</span>(df3_stdize)</code></pre></div>
<pre><code>##      radius            texture          perimeter            area        
##  Min.   :-1.60891   Min.   :-1.3923   Min.   :-1.8914   Min.   :-1.5667  
##  1st Qu.:-0.99404   1st Qu.:-0.8146   1st Qu.:-0.6031   1st Qu.:-0.7073  
##  Median : 0.03074   Median :-0.1406   Median :-0.1174   Median :-0.1842  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.85057   3rd Qu.: 0.7741   3rd Qu.: 0.7379   3rd Qu.: 0.6697  
##  Max.   : 1.67039   Max.   : 1.6888   Max.   : 3.1770   Max.   : 3.6756  
##    smoothness        compactness         symmetry       fractal_dimension
##  Min.   :-2.23539   Min.   :-1.4507   Min.   :-1.8896   Min.   :-1.4342  
##  1st Qu.:-0.63039   1st Qu.:-0.7556   1st Qu.:-0.6877   1st Qu.:-0.6981  
##  Median :-0.04986   Median :-0.1341   Median :-0.1030   Median :-0.2073  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.63312   3rd Qu.: 0.4956   3rd Qu.: 0.5142   3rd Qu.: 0.5288  
##  Max.   : 2.75035   Max.   : 3.5703   Max.   : 3.6001   Max.   : 3.9639</code></pre>
<p>We can now see that all means are centered around 0. Now we reconstruct our df with the response variable and we split the df into a training and testing set.<br />
</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df3_stdize &lt;-<span class="st"> </span><span class="kw">bind_cols</span>(<span class="dt">diagnosis =</span> df2<span class="op">$</span>diagnosis_result, df3_stdize)

param_split&lt;-<span class="st"> </span><span class="kw">createDataPartition</span>(df3_stdize<span class="op">$</span>diagnosis, <span class="dt">times =</span> <span class="dv">1</span>, <span class="dt">p =</span> <span class="fl">0.8</span>, 
                                      <span class="dt">list =</span> <span class="ot">FALSE</span>)
train_df3 &lt;-<span class="st"> </span>df3_stdize[param_split, ]
test_df3 &lt;-<span class="st"> </span>df3_stdize[<span class="op">-</span>param_split, ]

<span class="co">#We can check that we still have the same kind of split</span>
<span class="kw">prop.table</span>(<span class="kw">table</span>(train_df3<span class="op">$</span>diagnosis))</code></pre></div>
<pre><code>## 
##    Benign Malignant 
##  0.382716  0.617284</code></pre>
<p>Nice to see that the proportion of <em>Malign</em> vs <em>Benin</em> has been conserved.<br />
  We use KNN with cross-validation (discussed in more details in this section <a href="model-evaluation.html#crossvalidation">14.3</a> to train our model.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">trnctrl_df3 &lt;-<span class="st"> </span><span class="kw">trainControl</span>(<span class="dt">method =</span> <span class="st">&quot;cv&quot;</span>, <span class="dt">number =</span> <span class="dv">10</span>)
model_knn_df3 &lt;-<span class="st"> </span><span class="kw">train</span>(diagnosis <span class="op">~</span>., <span class="dt">data =</span> train_df3, <span class="dt">method =</span> <span class="st">&quot;knn&quot;</span>, 
                       <span class="dt">trControl =</span> trnctrl_df3, 
                       <span class="dt">tuneLength =</span> <span class="dv">10</span>)

model_knn_df3</code></pre></div>
<pre><code>## k-Nearest Neighbors 
## 
## 81 samples
##  8 predictor
##  2 classes: &#39;Benign&#39;, &#39;Malignant&#39; 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 73, 73, 73, 73, 73, 73, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy   Kappa    
##    5  0.8319444  0.6205678
##    7  0.8555556  0.6662155
##    9  0.8555556  0.6662155
##   11  0.8555556  0.6700251
##   13  0.8555556  0.6662155
##   15  0.8555556  0.6624060
##   17  0.8555556  0.6761056
##   19  0.8305556  0.6260615
##   21  0.8305556  0.6195489
##   23  0.8430556  0.6580104
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 17.</code></pre>

<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot</span>(model_knn_df3)</code></pre></div>
<p><img src="machinelearningwithR_files/figure-html/knn05-1.png" width="672" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">predict_knn_df3 &lt;-<span class="st"> </span><span class="kw">predict</span>(model_knn_df3, test_df3)
<span class="kw">confusionMatrix</span>(predict_knn_df3, test_df3<span class="op">$</span>diagnosis, <span class="dt">positive =</span> <span class="st">&quot;Malignant&quot;</span>)</code></pre></div>
<pre><code>## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Benign Malignant
##   Benign         4         0
##   Malignant      3        12
##                                           
##                Accuracy : 0.8421          
##                  95% CI : (0.6042, 0.9662)
##     No Information Rate : 0.6316          
##     P-Value [Acc &gt; NIR] : 0.04241         
##                                           
##                   Kappa : 0.6275          
##  Mcnemar&#39;s Test P-Value : 0.24821         
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.5714          
##          Pos Pred Value : 0.8000          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.6316          
##          Detection Rate : 0.6316          
##    Detection Prevalence : 0.7895          
##       Balanced Accuracy : 0.7857          
##                                           
##        &#39;Positive&#39; Class : Malignant       
## </code></pre>
</div>
<div id="example-2.-wine-dataset" class="section level2">
<h2><span class="header-section-number">7.2</span> Example 2. Wine dataset</h2>
<p> We load the dataset and do some quick cleaning</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df &lt;-<span class="st"> </span><span class="kw">read_csv</span>(<span class="st">&quot;dataset/Wine_UCI.csv&quot;</span>, <span class="dt">col_names =</span> <span class="ot">FALSE</span>)
<span class="kw">colnames</span>(df) &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="st">&quot;Origin&quot;</span>, <span class="st">&quot;Alcohol&quot;</span>, <span class="st">&quot;Malic_acid&quot;</span>, <span class="st">&quot;Ash&quot;</span>, <span class="st">&quot;Alkalinity_of_ash&quot;</span>, 
                  <span class="st">&quot;Magnesium&quot;</span>, <span class="st">&quot;Total_phenols&quot;</span>, <span class="st">&quot;Flavanoids&quot;</span>, <span class="st">&quot;Nonflavonoids_phenols&quot;</span>, 
                  <span class="st">&quot;Proanthocyanins&quot;</span>, <span class="st">&quot;Color_intensity&quot;</span>, <span class="st">&quot;Hue&quot;</span>, <span class="st">&quot;OD280_OD315_diluted_wines&quot;</span>, 
                  <span class="st">&quot;Proline&quot;</span>)

<span class="kw">glimpse</span>(df)</code></pre></div>
<pre><code>## Observations: 178
## Variables: 14
## $ Origin                    &lt;dbl&gt; 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Alcohol                   &lt;dbl&gt; 14.23, 13.20, 13.16, 14.37, 13.24, 14.…
## $ Malic_acid                &lt;dbl&gt; 1.71, 1.78, 2.36, 1.95, 2.59, 1.76, 1.…
## $ Ash                       &lt;dbl&gt; 2.43, 2.14, 2.67, 2.50, 2.87, 2.45, 2.…
## $ Alkalinity_of_ash         &lt;dbl&gt; 15.6, 11.2, 18.6, 16.8, 21.0, 15.2, 14…
## $ Magnesium                 &lt;dbl&gt; 127, 100, 101, 113, 118, 112, 96, 121,…
## $ Total_phenols             &lt;dbl&gt; 2.80, 2.65, 2.80, 3.85, 2.80, 3.27, 2.…
## $ Flavanoids                &lt;dbl&gt; 3.06, 2.76, 3.24, 3.49, 2.69, 3.39, 2.…
## $ Nonflavonoids_phenols     &lt;dbl&gt; 0.28, 0.26, 0.30, 0.24, 0.39, 0.34, 0.…
## $ Proanthocyanins           &lt;dbl&gt; 2.29, 1.28, 2.81, 2.18, 1.82, 1.97, 1.…
## $ Color_intensity           &lt;dbl&gt; 5.64, 4.38, 5.68, 7.80, 4.32, 6.75, 5.…
## $ Hue                       &lt;dbl&gt; 1.04, 1.05, 1.03, 0.86, 1.04, 1.05, 1.…
## $ OD280_OD315_diluted_wines &lt;dbl&gt; 3.92, 3.40, 3.17, 3.45, 2.93, 2.85, 3.…
## $ Proline                   &lt;dbl&gt; 1065, 1050, 1185, 1480, 735, 1450, 129…</code></pre>
<p>The origin is our dependent variable. Let’s make it a factor.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df<span class="op">$</span>Origin &lt;-<span class="st"> </span><span class="kw">as.factor</span>(df<span class="op">$</span>Origin)

<span class="co">#Let&#39;s check our explained variable distribution of origin</span>
<span class="kw">round</span>(<span class="kw">prop.table</span>(<span class="kw">table</span>(df<span class="op">$</span>Origin)), <span class="dv">2</span>)</code></pre></div>
<pre><code>## 
##    1    2    3 
## 0.33 0.40 0.27</code></pre>
<p>That’s nice, our explained variable is almost equally distributed with the 3 set of origin.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Let&#39;s also check if we have any NA values</span>
<span class="kw">summary</span>(df)</code></pre></div>
<pre><code>##  Origin    Alcohol        Malic_acid         Ash        Alkalinity_of_ash
##  1:59   Min.   :11.03   Min.   :0.740   Min.   :1.360   Min.   :10.60    
##  2:71   1st Qu.:12.36   1st Qu.:1.603   1st Qu.:2.210   1st Qu.:17.20    
##  3:48   Median :13.05   Median :1.865   Median :2.360   Median :19.50    
##         Mean   :13.00   Mean   :2.336   Mean   :2.367   Mean   :19.49    
##         3rd Qu.:13.68   3rd Qu.:3.083   3rd Qu.:2.558   3rd Qu.:21.50    
##         Max.   :14.83   Max.   :5.800   Max.   :3.230   Max.   :30.00    
##    Magnesium      Total_phenols     Flavanoids    Nonflavonoids_phenols
##  Min.   : 70.00   Min.   :0.980   Min.   :0.340   Min.   :0.1300       
##  1st Qu.: 88.00   1st Qu.:1.742   1st Qu.:1.205   1st Qu.:0.2700       
##  Median : 98.00   Median :2.355   Median :2.135   Median :0.3400       
##  Mean   : 99.74   Mean   :2.295   Mean   :2.029   Mean   :0.3619       
##  3rd Qu.:107.00   3rd Qu.:2.800   3rd Qu.:2.875   3rd Qu.:0.4375       
##  Max.   :162.00   Max.   :3.880   Max.   :5.080   Max.   :0.6600       
##  Proanthocyanins Color_intensity       Hue        
##  Min.   :0.410   Min.   : 1.280   Min.   :0.4800  
##  1st Qu.:1.250   1st Qu.: 3.220   1st Qu.:0.7825  
##  Median :1.555   Median : 4.690   Median :0.9650  
##  Mean   :1.591   Mean   : 5.058   Mean   :0.9574  
##  3rd Qu.:1.950   3rd Qu.: 6.200   3rd Qu.:1.1200  
##  Max.   :3.580   Max.   :13.000   Max.   :1.7100  
##  OD280_OD315_diluted_wines    Proline      
##  Min.   :1.270             Min.   : 278.0  
##  1st Qu.:1.938             1st Qu.: 500.5  
##  Median :2.780             Median : 673.5  
##  Mean   :2.612             Mean   : 746.9  
##  3rd Qu.:3.170             3rd Qu.: 985.0  
##  Max.   :4.000             Max.   :1680.0</code></pre>
<p>Here we noticed that the range of values in our variable is quite wide. It means our data will need to be standardize. We also note that we no “NA” values. That’s quite a nice surprise!</p>
<div id="understand-the-data" class="section level3">
<h3><span class="header-section-number">7.2.1</span> Understand the data</h3>
<p>We first slide our data in a training and testing set.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df2 &lt;-<span class="st"> </span>df
param_split_df2 &lt;-<span class="st"> </span><span class="kw">createDataPartition</span>(df2<span class="op">$</span>Origin, <span class="dt">p =</span> <span class="fl">0.75</span>, <span class="dt">list =</span> <span class="ot">FALSE</span>)

train_df2 &lt;-<span class="st"> </span>df2[param_split_df2, ]
test_df2 &lt;-<span class="st"> </span>df2[<span class="op">-</span>param_split_df2, ]</code></pre></div>
<p>The great with caret is we can standardize our data in the the training phase.</p>
<div id="model-the-data" class="section level4">
<h4><span class="header-section-number">7.2.1.1</span> Model the data</h4>
<p>Let’s keep using <code>caret</code> for our training.<br />
</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">trnctrl_df2 &lt;-<span class="st"> </span><span class="kw">trainControl</span>(<span class="dt">method =</span> <span class="st">&quot;repeatedcv&quot;</span>, <span class="dt">number =</span> <span class="dv">10</span>, <span class="dt">repeats =</span> <span class="dv">3</span>)
model_knn_df2 &lt;-<span class="st"> </span><span class="kw">train</span>(Origin <span class="op">~</span>., <span class="dt">data =</span> train_df2, <span class="dt">method =</span> <span class="st">&quot;knn&quot;</span>, 
                       <span class="dt">trControl =</span> trnctrl_df2, 
                       <span class="dt">preProcess =</span> <span class="kw">c</span>(<span class="st">&quot;center&quot;</span>, <span class="st">&quot;scale&quot;</span>),  
                       <span class="dt">tuneLength =</span> <span class="dv">10</span>)</code></pre></div>

<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">model_knn_df2</code></pre></div>
<pre><code>## k-Nearest Neighbors 
## 
## 135 samples
##  13 predictor
##   3 classes: &#39;1&#39;, &#39;2&#39;, &#39;3&#39; 
## 
## Pre-processing: centered (13), scaled (13) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 123, 123, 121, 121, 121, 122, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy   Kappa    
##    5  0.9548291  0.9319335
##    7  0.9724664  0.9584705
##    9  0.9748779  0.9618555
##   11  0.9800061  0.9697717
##   13  0.9778083  0.9663714
##   15  0.9801893  0.9699160
##   17  0.9801893  0.9699160
##   19  0.9851343  0.9775459
##   21  0.9825702  0.9736065
##   23  0.9825702  0.9736065
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 19.</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot</span>(model_knn_df2)</code></pre></div>
<p><img src="machinelearningwithR_files/figure-html/plot01_knn-1.png" width="672" /></p>
<p>Let’s use our model to make our prediction</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">prediction_knn_df2 &lt;-<span class="st"> </span><span class="kw">predict</span>(model_knn_df2, <span class="dt">newdata =</span> test_df2)

<span class="kw">confusionMatrix</span>(prediction_knn_df2, <span class="dt">reference =</span> test_df2<span class="op">$</span>Origin)</code></pre></div>
<pre><code>## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 14  1  0
##          2  0 15  0
##          3  0  1 12
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9535          
##                  95% CI : (0.8419, 0.9943)
##     No Information Rate : 0.3953          
##     P-Value [Acc &gt; NIR] : 1.02e-14        
##                                           
##                   Kappa : 0.93            
##  Mcnemar&#39;s Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            1.0000   0.8824   1.0000
## Specificity            0.9655   1.0000   0.9677
## Pos Pred Value         0.9333   1.0000   0.9231
## Neg Pred Value         1.0000   0.9286   1.0000
## Prevalence             0.3256   0.3953   0.2791
## Detection Rate         0.3256   0.3488   0.2791
## Detection Prevalence   0.3488   0.3488   0.3023
## Balanced Accuracy      0.9828   0.9412   0.9839</code></pre>
</div>
</div>
</div>
<div id="references-3" class="section level2">
<h2><span class="header-section-number">7.3</span> References</h2>
<ul>
<li>KNN R, K-Nearest neighbor implementation in R using caret package. <a href="http://dataaspirant.com/2017/01/09/knn-implementation-r-using-caret-package/">Here</a></li>
<li>A complete guide to KNN. <a href="https://kevinzakka.github.io/2016/07/13/k-nearest-neighbor/">Here</a></li>
<li>K-Means Clustering in R Tutorial. <a href="https://www.datacamp.com/community/tutorials/k-means-clustering-r?utm_campaign=News&amp;utm_medium=Community&amp;utm_source=DataCamp.com">Here</a></li>
</ul>

</div>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="gradient-descent.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="kmeans.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"google": false,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "google", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": "https://github.com/fderyckel/machinelearningwithr/edit/master/07-KNN.Rmd",
"text": "Suggest edit to this page"
},
"history": {
"link": null,
"text": null
},
"download": null,
"toc": {
"collapse": "section"
}
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:" && /^https?:/.test(src))
      src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>