From 572b64f6eb398b29d288736d7066b5bf59f783c2 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Wed, 22 May 2019 16:40:49 -0300
Subject: [PATCH 01/18] check for index in setkey

---
 R/setkey.R | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index c9a366b02..bc670995d 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -86,12 +86,18 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
     if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.")
   }
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
-  if (verbose) {
-    tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
-    # suppress needed for tests 644 and 645 in verbose mode
-    cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n")
+  
+  # forder only if index is not present
+  if(is.null(indices(x))){
+      if (verbose) {
+          tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
+          # suppress needed for tests 644 and 645 in verbose mode
+          cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n")
+      } else {
+          o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
+      }
   } else {
-    o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
+      o <- attr(attributes(x)$index, names(attributes(attributes(x)$index)))
   }
   if (!physical) {
     if (is.null(attr(x,"index",exact=TRUE))) setattr(x, "index", integer())

From c8ddc62f432a60e387dcc9a229a358955c71ac9b Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Wed, 22 May 2019 17:15:20 -0300
Subject: [PATCH 02/18] explicitly naming the parameters

---
 R/setkey.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index bc670995d..1f9a85a50 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -86,7 +86,6 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
     if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.")
   }
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
-  
   # forder only if index is not present
   if(is.null(indices(x))){
       if (verbose) {
@@ -97,7 +96,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
           o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
       }
   } else {
-      o <- attr(attributes(x)$index, names(attributes(attributes(x)$index)))
+      o <- attr(attributes(x)$index, which=names(attributes(attributes(x)$index)), exact = TRUE)
   }
   if (!physical) {
     if (is.null(attr(x,"index",exact=TRUE))) setattr(x, "index", integer())

From bfefa7143737bbd7e5d767238f056f4c7243569c Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Wed, 22 May 2019 17:55:49 -0300
Subject: [PATCH 03/18] using cols to check existing index

---
 R/setkey.R | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 1f9a85a50..25929cb40 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -86,8 +86,16 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
     if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.")
   }
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
-  # forder only if index is not present
+  
+  # get existing index name if any
+  found_index <- NULL
   if(is.null(indices(x))){
+      found_index <- names(attributes(attributes(x)$index))
+      found_index <- gsub("^__","", found_index)
+  }
+  
+  # forder only if index is not present
+  if(!identical(found_index, cols)){
       if (verbose) {
           tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
           # suppress needed for tests 644 and 645 in verbose mode
@@ -96,7 +104,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
           o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
       }
   } else {
-      o <- attr(attributes(x)$index, which=names(attributes(attributes(x)$index)), exact = TRUE)
+      o <- attr(attributes(x)$index, which=found_index, exact = TRUE)
   }
   if (!physical) {
     if (is.null(attr(x,"index",exact=TRUE))) setattr(x, "index", integer())

From 0c64f515797efb719a3a057db204b49331ea11ee Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Thu, 23 May 2019 10:08:22 -0300
Subject: [PATCH 04/18] add test

---
 inst/tests/tests.Rraw | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index d56ee7f1b..e38fc27f4 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6029,6 +6029,13 @@ thisDT <- copy(DT)[, c("aaa", "b") := 2]
 test(1419.58, indices(thisDT), c("a", "ab"))
 test(1419.59, allIndicesValid(thisDT), TRUE)
 
+## setkey on same col as index before
+DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
+                 aaa = c(1,1,2,2,2,1,1,2,2,2))
+setindex(DT, a)
+test(1419.59, allIndicesValid(DT), TRUE)
+setkey(DT, a)
+test(1419.60, indices(DT), c("a"))
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From d46358f0b48721d19a96f6c3fad2b413328b8e85 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Thu, 23 May 2019 11:05:45 -0300
Subject: [PATCH 05/18] adding test, fixing bug

---
 R/setkey.R            | 3 ++-
 inst/tests/tests.Rraw | 5 ++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 25929cb40..4e15dcacf 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -89,7 +89,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
   
   # get existing index name if any
   found_index <- NULL
-  if(is.null(indices(x))){
+  if(!is.null(indices(x))){
       found_index <- names(attributes(attributes(x)$index))
       found_index <- gsub("^__","", found_index)
   }
@@ -104,6 +104,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
           o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
       }
   } else {
+      cat("using existing index for", found_index, "\n")
       o <- attr(attributes(x)$index, which=found_index, exact = TRUE)
   }
   if (!physical) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e38fc27f4..b0e334ec0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6033,9 +6033,8 @@ test(1419.59, allIndicesValid(thisDT), TRUE)
 DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
                  aaa = c(1,1,2,2,2,1,1,2,2,2))
 setindex(DT, a)
-test(1419.59, allIndicesValid(DT), TRUE)
-setkey(DT, a)
-test(1419.60, indices(DT), c("a"))
+test(1419.60, allIndicesValid(DT), TRUE)
+test(1419.61, setkey(DT, a), output="using existing index for a")
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From 057938feb2aabe18b3aac0983273b6b54f83d09b Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Fri, 24 May 2019 17:10:12 -0300
Subject: [PATCH 06/18] check for multi-indexes

---
 R/setkey.R            | 19 +++++++++++--------
 inst/tests/tests.Rraw |  7 +++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 4e15dcacf..0905bc818 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -88,14 +88,11 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
   
   # get existing index name if any
-  found_index <- NULL
-  if(!is.null(indices(x))){
-      found_index <- names(attributes(attributes(x)$index))
-      found_index <- gsub("^__","", found_index)
-  }
+  if(!is.null(indices(x))) found_index = names(attributes(attributes(x)$index))
+  new_possible_index = paste0("__", cols, collapse="")
   
   # forder only if index is not present
-  if(!identical(found_index, cols)){
+  if(!any(new_possible_index == found_index)){
       if (verbose) {
           tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
           # suppress needed for tests 644 and 645 in verbose mode
@@ -104,8 +101,14 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
           o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
       }
   } else {
-      cat("using existing index for", found_index, "\n")
-      o <- attr(attributes(x)$index, which=found_index, exact = TRUE)
+      # find the matching index
+      ix =  found_index[which(found_index == new_possible_index)]
+      if (verbose){
+          cat("using existing index for", gsub("^__","", ix), "\n")
+          o <- attr(attributes(x)$index, which=ix, exact = TRUE)
+      } else {
+          o <- attr(attributes(x)$index, which=ix, exact = TRUE)
+      }
   }
   if (!physical) {
     if (is.null(attr(x,"index",exact=TRUE))) setattr(x, "index", integer())
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b0e334ec0..5cd4808f0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6035,6 +6035,13 @@ DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
 setindex(DT, a)
 test(1419.60, allIndicesValid(DT), TRUE)
 test(1419.61, setkey(DT, a), output="using existing index for a")
+DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
+                 aaa = c(1,1,2,2,2,1,1,2,2,2),
+                 bbb = c(1,1,2,0,1,1,1,0,1,1))
+setindex(DT, a)
+setindex(DT, aaa)
+test(1419.62, allIndicesValid(DT), TRUE)
+test(1419.63, setkey(DT, aaa), output="using existing index for aaa")
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From 7503aeaf5a0af65d8e0d654aa26f957338ec182b Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Fri, 24 May 2019 21:18:59 -0300
Subject: [PATCH 07/18] fix bug

---
 R/setkey.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/setkey.R b/R/setkey.R
index 0905bc818..0072856cf 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -88,7 +88,8 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
   
   # get existing index name if any
-  if(!is.null(indices(x))) found_index = names(attributes(attributes(x)$index))
+  found_index = NULL
+  if(!is.null(indices(x))) found_index <- names(attributes(attributes(x)$index))
   new_possible_index = paste0("__", cols, collapse="")
   
   # forder only if index is not present

From 3b0d3f5753f280eabc8ce03eaf2f9758300bb656 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Fri, 24 May 2019 21:40:40 -0300
Subject: [PATCH 08/18] use verbose param in test

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 5cd4808f0..532f41789 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6034,14 +6034,14 @@ DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
                  aaa = c(1,1,2,2,2,1,1,2,2,2))
 setindex(DT, a)
 test(1419.60, allIndicesValid(DT), TRUE)
-test(1419.61, setkey(DT, a), output="using existing index for a")
+test(1419.61, setkey(DT, a, verbose=TRUE), output="using existing index for a")
 DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
                  aaa = c(1,1,2,2,2,1,1,2,2,2),
                  bbb = c(1,1,2,0,1,1,1,0,1,1))
 setindex(DT, a)
 setindex(DT, aaa)
 test(1419.62, allIndicesValid(DT), TRUE)
-test(1419.63, setkey(DT, aaa), output="using existing index for aaa")
+test(1419.63, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa")
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From 153ae6c5cfd7b7a5ab4f59b66f659321bf5b9100 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 14:17:59 -0300
Subject: [PATCH 09/18] add more test, description, news, fixes as suggested

---
 DESCRIPTION           |  3 ++-
 NEWS.md               |  2 ++
 R/setkey.R            | 12 +++++-------
 inst/tests/tests.Rraw | 13 +++++++++----
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4d5ecafd5..24ce231aa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -39,7 +39,8 @@ Authors@R: c(
   person("Michael","Quinn",        role="ctb"),
   person("@javrucebo","",          role="ctb"),
   person("@marc-outins","",        role="ctb"),
-  person("Roy","Storey",           role="ctb"))
+  person("Roy","Storey",           role="ctb")),
+  person("Manish","Saraswat",      role="ctb"))
 Depends: R (>= 3.1.0)
 Imports: methods
 Suggests: bit64, curl, R.utils, knitr, xts, nanotime, zoo, yaml
diff --git a/NEWS.md b/NEWS.md
index a18b2cef3..31630923d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -118,6 +118,8 @@
 
 15. `rbind` and `rbindlist` of zero-row items now retain (again) the unused levels of any (zero-length) factor columns, [#3508](https://github.com/Rdatatable/data.table/issues/3508). This was a regression in v1.12.2 just for zero-row items. Unused factor levels were already retained for items having `nrow>=1`. Thanks to Gregory Demin for reporting.
 
+16. `setkey` now checks for existing index before doing `forder` [#3582](https://github.com/Rdatatable/data.table/pull/3582/). Executes `forder` only if the index does not exist. Thanks to @MichaelChirico for reporting and @saraswatmks for the PR.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/R/setkey.R b/R/setkey.R
index e507cfb39..0b18f33e7 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -88,12 +88,10 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
   
   # get existing index name if any
-  found_index = NULL
-  if(!is.null(indices(x))) found_index <- names(attributes(attributes(x)$index))
-  new_possible_index = paste0("__", cols, collapse="")
+  index = paste0(cols, collapse="__")
   
   # forder only if index is not present
-  if(!any(new_possible_index == found_index)){
+  if (!any(index == indices(x))){
       if (verbose) {
           tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
           # suppress needed for tests 644 and 645 in verbose mode
@@ -102,10 +100,10 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
           o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
       }
   } else {
-      # find the matching index
-      ix =  found_index[which(found_index == new_possible_index)]
+      # find the name of matching index
+      ix = indices(x)[which(indices(x) == index)]
       if (verbose){
-          cat("using existing index for", gsub("^__","", ix), "\n")
+          cat("using existing index for", ix, "\n")
           o <- attr(attributes(x)$index, which=ix, exact = TRUE)
       } else {
           o <- attr(attributes(x)$index, which=ix, exact = TRUE)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1432037c6..f90ff0d21 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6032,18 +6032,23 @@ test(1419.58, indices(thisDT), c("a", "ab"))
 test(1419.59, allIndicesValid(thisDT), TRUE)
 
 ## setkey on same col as index before
-DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
+DT <- data.table(a =c(4,1,3,9,2,1,8,7,6,5),
                  aaa = c(1,1,2,2,2,1,1,2,2,2))
 setindex(DT, a)
 test(1419.60, allIndicesValid(DT), TRUE)
-test(1419.61, setkey(DT, a, verbose=TRUE), output="using existing index for a")
+setkey(DT, a)
+test(1419.61, DT$a, c(1,1,2,3,4,5,6,7,8,9))
+setkey(DT, NULL)
+test(1419.62, setkey(DT, a, verbose=TRUE), output="using existing index for a")
+
+# check setkey incase of existing multiple indexes
 DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
                  aaa = c(1,1,2,2,2,1,1,2,2,2),
                  bbb = c(1,1,2,0,1,1,1,0,1,1))
 setindex(DT, a)
 setindex(DT, aaa)
-test(1419.62, allIndicesValid(DT), TRUE)
-test(1419.63, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa")
+test(1419.63, allIndicesValid(DT), TRUE)
+test(1419.64, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa")
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From 18097827f8c9e3aa8f94c3f7d6afead2e9308662 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 14:58:51 -0300
Subject: [PATCH 10/18] update news

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 1da3af114..d16965b32 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -122,6 +122,8 @@
 
 16. `rbind` and `rbindlist` of an item containing an ordered factor with levels containing an `NA` (as opposed to an NA integer) could segfault, [#3601](https://github.com/Rdatatable/data.table/issues/3601). This was a a regression in v1.12.2. Thanks to Damian Betebenner for reporting.
 
+17. `setkey` now checks for existing index before doing `forder` [#3582](https://github.com/Rdatatable/data.table/pull/3582/). Executes `forder` only if the index does not exist. Thanks to @MichaelChirico for reporting and @saraswatmks for the PR.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.

From a6aa862339afb5e8df93ebf1084579460dfa6b2f Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 16:16:04 -0300
Subject: [PATCH 11/18] fix issue

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 24ce231aa..f8addc857 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -39,7 +39,7 @@ Authors@R: c(
   person("Michael","Quinn",        role="ctb"),
   person("@javrucebo","",          role="ctb"),
   person("@marc-outins","",        role="ctb"),
-  person("Roy","Storey",           role="ctb")),
+  person("Roy","Storey",           role="ctb"),
   person("Manish","Saraswat",      role="ctb"))
 Depends: R (>= 3.1.0)
 Imports: methods

From 928812130627034ce9a3ae52ce33ef0deac26d5a Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 16:52:24 -0300
Subject: [PATCH 12/18] set index in test1419.62

---
 inst/tests/tests.Rraw | 1 +
 1 file changed, 1 insertion(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 68bb71cf7..115cbe482 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6039,6 +6039,7 @@ test(1419.60, allIndicesValid(DT), TRUE)
 setkey(DT, a)
 test(1419.61, DT$a, c(1,1,2,3,4,5,6,7,8,9))
 setkey(DT, NULL)
+setindex(DT, a)
 test(1419.62, setkey(DT, a, verbose=TRUE), output="using existing index for a")
 
 # check setkey incase of existing multiple indexes

From b50025324dff08ba20c3bd6e81b1e50db05f7f3f Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 17:28:22 -0300
Subject: [PATCH 13/18] fix test error

---
 inst/tests/tests.Rraw | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 115cbe482..ff556a62d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6036,6 +6036,7 @@ DT <- data.table(a =c(4,1,3,9,2,1,8,7,6,5),
                  aaa = c(1,1,2,2,2,1,1,2,2,2))
 setindex(DT, a)
 test(1419.60, allIndicesValid(DT), TRUE)
+setindex(DT, NULL)
 setkey(DT, a)
 test(1419.61, DT$a, c(1,1,2,3,4,5,6,7,8,9))
 setkey(DT, NULL)
@@ -6043,9 +6044,14 @@ setindex(DT, a)
 test(1419.62, setkey(DT, a, verbose=TRUE), output="using existing index for a")
 
 # check setkey incase of existing multiple indexes
-DT <- data.table(a = c(1,1,1,1,1,2,2,2,2,2),
+DT <- data.table(a = c(3,3,4,4,5,6,1,1,7,2,2),
                  aaa = c(1,1,2,2,2,1,1,2,2,2),
                  bbb = c(1,1,2,0,1,1,1,0,1,1))
+setkey(DT, a)
+test(1419.63, DT$a, c(1,1,2,2,3,3,4,4,5,6,7))
+setkey(DT, NULL)
+test(1419.64, setkey(DT, a, verbose=TRUE), output="forder took")
+setkey(DT, NULL)
 setindex(DT, a)
 setindex(DT, aaa)
 test(1419.63, allIndicesValid(DT), TRUE)

From 230e5d1b486b2af2eaddf0e3cc34c8bfeff68449 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 18:00:55 -0300
Subject: [PATCH 14/18] fix test sequence

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ff556a62d..51130becf 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6054,8 +6054,8 @@ test(1419.64, setkey(DT, a, verbose=TRUE), output="forder took")
 setkey(DT, NULL)
 setindex(DT, a)
 setindex(DT, aaa)
-test(1419.63, allIndicesValid(DT), TRUE)
-test(1419.64, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa")
+test(1419.65, allIndicesValid(DT), TRUE)
+test(1419.66, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa")
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From 97d64a5f3c3195b8171e47ac78293ca4a79c7654 Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Mon, 27 May 2019 18:13:44 -0300
Subject: [PATCH 15/18] fix testcase 1376.09, 1376.11, 1376.12

---
 inst/tests/tests.Rraw | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 51130becf..6b109cc2b 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5668,13 +5668,13 @@ test(1376.06, indices(DT), c("b","a"))  # 2 secondary keys of single columns
 test(1376.07, DT[a==7L,verbose=TRUE], DT[7L], output="Optimized subsetting with index 'a'")
 setkey(DT,b)
 test(1376.08, indices(DT), NULL)
-test(1376.09, list(DT[a==2L], indices(DT)), list(DT[9L],"a"))  # create indices for next test
+test(1376.09, list(DT[a==2L], indices(DT)), list(DT[2L],"a"))  # create indices for next test
 setindex(DT,NULL)
 test(1376.10, list(key(DT), indices(DT)), list("b", NULL))
 options(datatable.auto.index = FALSE)
-test(1376.11, list(DT[a==2L], indices(DT)), list(DT[9L],NULL))
+test(1376.11, list(DT[a==2L], indices(DT)), list(DT[2L],NULL))
 options(datatable.auto.index = TRUE)
-test(1376.12, list(DT[a==2L], indices(DT)), list(DT[9L],"a"))
+test(1376.12, list(DT[a==2L], indices(DT)), list(DT[2L],"a"))
 
 # When i is FALSE and a column is being added by reference, for consistency with cases when i is not FALSE
 # we should still add the column. But we need to know what type it should be, so the user supplied RHS of :=

From 70b5760f2f8d959f0a2c55a4c6969a566e3f191d Mon Sep 17 00:00:00 2001
From: manish saraswat <manisara0000@gmail.com>
Date: Tue, 28 May 2019 15:40:36 -0300
Subject: [PATCH 16/18] remove which index

---
 R/setkey.R | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 0b18f33e7..dda9a4972 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -88,10 +88,10 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
   
   # get existing index name if any
-  index = paste0(cols, collapse="__")
+  newkey = paste0(cols, collapse="__")
   
   # forder only if index is not present
-  if (!any(index == indices(x))){
+  if (!any(indices(x) == newkey)){
       if (verbose) {
           tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
           # suppress needed for tests 644 and 645 in verbose mode
@@ -101,12 +101,11 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
       }
   } else {
       # find the name of matching index
-      ix = indices(x)[which(indices(x) == index)]
       if (verbose){
-          cat("using existing index for", ix, "\n")
-          o <- attr(attributes(x)$index, which=ix, exact = TRUE)
+          cat("using existing index for", newkey, "\n")
+          o <- attr(attributes(x)$index, which=newkey, exact = TRUE)
       } else {
-          o <- attr(attributes(x)$index, which=ix, exact = TRUE)
+          o <- attr(attributes(x)$index, which=newkey, exact = TRUE)
       }
   }
   if (!physical) {

From fcb0c5b699e4a1409bacd5ec90d344b92157b70e Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 28 May 2019 13:05:33 -0700
Subject: [PATCH 17/18] added checks on y too to new setkey tests

---
 inst/tests/tests.Rraw | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 6b109cc2b..107890523 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5934,7 +5934,7 @@ test(1419.18, key(thisDT), "x1")
 ## testing secondary index retainment on assign (#2372)
 
 allIndicesValid <- function(DT){
-  ## checks that the order of all indices is correct
+  ## checks that the order of all indices are correct
   for(idx in seq_along(indices(DT))){
     index <- attr(attr(DT, "index"), paste0("__", indices(DT)[idx], collapse = ""))
     if(!length(index)) index <- seq_len(nrow(DT))
@@ -6031,7 +6031,7 @@ thisDT <- copy(DT)[, c("aaa", "b") := 2]
 test(1419.58, indices(thisDT), c("a", "ab"))
 test(1419.59, allIndicesValid(thisDT), TRUE)
 
-## setkey on same col as index before
+## setkey on same col as existing index, #2889
 DT <- data.table(a =c(4,1,3,9,2,1,8,7,6,5),
                  aaa = c(1,1,2,2,2,1,1,2,2,2))
 setindex(DT, a)
@@ -6041,21 +6041,23 @@ setkey(DT, a)
 test(1419.61, DT$a, c(1,1,2,3,4,5,6,7,8,9))
 setkey(DT, NULL)
 setindex(DT, a)
-test(1419.62, setkey(DT, a, verbose=TRUE), output="using existing index for a")
+test(1419.62, setkey(DT, a, verbose=TRUE), data.table(a=c(1,1:9), aaa=c(1,1,2,2,1,2,2,2,1,2), key="a"),
+              output="using existing index for a")  # checks also that the prior index a is dropped (because y is keyed with no index)
 
-# check setkey incase of existing multiple indexes
-DT <- data.table(a = c(3,3,4,4,5,6,1,1,7,2,2),
-                 aaa = c(1,1,2,2,2,1,1,2,2,2),
-                 bbb = c(1,1,2,0,1,1,1,0,1,1))
+# setkey picks correct index of multiple indexes (e.g. exact=TRUE is used in internals)
+DT = data.table(a =   c(3,3,4,4,5,6,1,1,7,2),
+                aaa = c(1,1,2,2,2,1,1,2,2,2),
+                bbb = c(1,1,2,0,1,1,1,0,1,1))
 setkey(DT, a)
-test(1419.63, DT$a, c(1,1,2,2,3,3,4,4,5,6,7))
+test(1419.63, DT$a, c(1,1,2,3,3,4,4,5,6,7))
 setkey(DT, NULL)
 test(1419.64, setkey(DT, a, verbose=TRUE), output="forder took")
 setkey(DT, NULL)
-setindex(DT, a)
-setindex(DT, aaa)
+setindex(DT, aaa, a)
+setindex(DT, aaa)   # this aaa not previous aaa_a should be used by setkey(DT,aaa); i.e. ensure no partial matching
 test(1419.65, allIndicesValid(DT), TRUE)
-test(1419.66, setkey(DT, aaa, verbose=TRUE), output="using existing index for aaa")
+test(1419.66, setkey(DT, aaa, verbose=TRUE), data.table(a=c(1,3,3,6,1,2,4,4,5,7), aaa=c(1,1,1,1,2,2,2,2,2,2), bbb=c(1,1,1,1,0,1,2,0,1,1), key="aaa"),
+              output="using existing index for aaa")  # checks that all indexes are dropped (aaa_a too)
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)

From bf59b635460d6dd59b65c65213c891858e532661 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 28 May 2019 14:16:18 -0700
Subject: [PATCH 18/18] uses getindex(), 2-space indentation, 2L back to 9L in
 test 1376

---
 R/setkey.R            | 36 ++++++++++++++----------------------
 inst/tests/tests.Rraw | 10 +++++-----
 2 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index dda9a4972..eb0a4b89b 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -86,27 +86,19 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
     if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.")
   }
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
-  
-  # get existing index name if any
+
   newkey = paste0(cols, collapse="__")
-  
-  # forder only if index is not present
-  if (!any(indices(x) == newkey)){
-      if (verbose) {
-          tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
-          # suppress needed for tests 644 and 645 in verbose mode
-          cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n")
-      } else {
-          o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)
-      }
+  if (!any(indices(x) == newkey)) {
+    if (verbose) {
+      tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
+      # suppress needed for tests 644 and 645 in verbose mode
+      cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n")
+    } else {
+      o = forderv(x, cols, sort=TRUE, retGrp=FALSE)
+    }
   } else {
-      # find the name of matching index
-      if (verbose){
-          cat("using existing index for", newkey, "\n")
-          o <- attr(attributes(x)$index, which=newkey, exact = TRUE)
-      } else {
-          o <- attr(attributes(x)$index, which=newkey, exact = TRUE)
-      }
+    if (verbose) cat("setkey on columns ", brackify(cols), " using existing index '", newkey, "'\n", sep="")
+    o = getindex(x, newkey)
   }
   if (!physical) {
     if (is.null(attr(x,"index",exact=TRUE))) setattr(x, "index", integer())
@@ -175,7 +167,7 @@ setreordervec = function(x, order) .Call(Creorder, x, order)
 
 is.sorted = function(x, by=seq_along(x)) {
   if (is.list(x)) {
-    warning("Use 'if (length(o<-forderv(DT,by))) ...' for efficiency in one step, so you have o as well if not sorted.")
+    warning("Use 'if (length(o <- forderv(DT,by))) ...' for efficiency in one step, so you have o as well if not sorted.")
     # could pass through a flag for forderv to return early on first FALSE. But we don't need that internally
     # since internally we always then need ordering, an it's better in one step. Don't want inefficiency to creep in.
     # This is only here for user/debugging use to check/test valid keys; e.g. data.table:::is.sorted(DT,by)
@@ -250,7 +242,7 @@ forder = function(x, ..., na.last=TRUE, decreasing=FALSE)
       }
       if (is.name(v)) {
         ix = chmatch(as.character(v), xcols, nomatch=0L)
-        if (ix != 0L) ans <- point(ans, i, x, ix) # see 'point' in data.table.R and C-version pointWrapper in assign.c - avoid copies
+        if (ix != 0L) ans = point(ans, i, x, ix) # see 'point' in data.table.R and C-version pointWrapper in assign.c - avoid copies
         else {
           v = as.call(list(as.name("list"), v))
           ans = point(ans, i, eval(v, x, parent.frame()), 1L)
@@ -273,7 +265,7 @@ forder = function(x, ..., na.last=TRUE, decreasing=FALSE)
 fsort = function(x, decreasing=FALSE, na.last=FALSE, internal=FALSE, verbose=FALSE, ...)
 {
   containsNAs = FALSE
-  if (typeof(x)=="double" && !decreasing && !(containsNAs<-anyNA(x))) {
+  if (typeof(x)=="double" && !decreasing && !(containsNAs <- anyNA(x))) {
       if (internal) stop("Internal code should not be being called on type double")
       return(.Call(Cfsort, x, verbose))
   }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 107890523..94870a592 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5668,13 +5668,13 @@ test(1376.06, indices(DT), c("b","a"))  # 2 secondary keys of single columns
 test(1376.07, DT[a==7L,verbose=TRUE], DT[7L], output="Optimized subsetting with index 'a'")
 setkey(DT,b)
 test(1376.08, indices(DT), NULL)
-test(1376.09, list(DT[a==2L], indices(DT)), list(DT[2L],"a"))  # create indices for next test
+test(1376.09, list(DT[a==2L], indices(DT)), list(DT[9L],"a"))  # create indices for next test
 setindex(DT,NULL)
 test(1376.10, list(key(DT), indices(DT)), list("b", NULL))
 options(datatable.auto.index = FALSE)
-test(1376.11, list(DT[a==2L], indices(DT)), list(DT[2L],NULL))
+test(1376.11, list(DT[a==2L], indices(DT)), list(DT[9L],NULL))
 options(datatable.auto.index = TRUE)
-test(1376.12, list(DT[a==2L], indices(DT)), list(DT[2L],"a"))
+test(1376.12, list(DT[a==2L], indices(DT)), list(DT[9L],"a"))
 
 # When i is FALSE and a column is being added by reference, for consistency with cases when i is not FALSE
 # we should still add the column. But we need to know what type it should be, so the user supplied RHS of :=
@@ -6042,7 +6042,7 @@ test(1419.61, DT$a, c(1,1,2,3,4,5,6,7,8,9))
 setkey(DT, NULL)
 setindex(DT, a)
 test(1419.62, setkey(DT, a, verbose=TRUE), data.table(a=c(1,1:9), aaa=c(1,1,2,2,1,2,2,2,1,2), key="a"),
-              output="using existing index for a")  # checks also that the prior index a is dropped (because y is keyed with no index)
+              output="setkey on columns [a] using existing index 'a'")  # checks also that the prior index a is dropped (because y is keyed with no index)
 
 # setkey picks correct index of multiple indexes (e.g. exact=TRUE is used in internals)
 DT = data.table(a =   c(3,3,4,4,5,6,1,1,7,2),
@@ -6057,7 +6057,7 @@ setindex(DT, aaa, a)
 setindex(DT, aaa)   # this aaa not previous aaa_a should be used by setkey(DT,aaa); i.e. ensure no partial matching
 test(1419.65, allIndicesValid(DT), TRUE)
 test(1419.66, setkey(DT, aaa, verbose=TRUE), data.table(a=c(1,3,3,6,1,2,4,4,5,7), aaa=c(1,1,1,1,2,2,2,2,2,2), bbb=c(1,1,1,1,0,1,2,0,1,1), key="aaa"),
-              output="using existing index for aaa")  # checks that all indexes are dropped (aaa_a too)
+              output="setkey on columns [aaa] using existing index 'aaa'")  # checks that all indexes are dropped (aaa_a too)
 
 # setnames updates secondary key
 DT = data.table(a=1:5,b=10:6)