diff --git a/.Rbuildignore b/.Rbuildignore
index d7ff1c3..8bc8488 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -8,3 +8,5 @@ remove_whitespace.sh
^vignettes/*.html
scripts/*
create_md.R
+SO_Answers/*.R
+^codecov\.yml$
diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml
index e712a21..7d77610 100644
--- a/.github/workflows/R-CMD-check.yml
+++ b/.github/workflows/R-CMD-check.yml
@@ -24,7 +24,7 @@ jobs:
fail-fast: false
matrix:
config:
- - {os: macos-latest, r: 'release'}
+ - {os: macos-latest, r: 'devel'}
- {os: windows-latest, r: 'release'}
# use 4.1 to check with rtools40's older compiler
@@ -35,7 +35,6 @@ jobs:
- {os: ubuntu-latest, r: 'oldrel-1'}
- {os: ubuntu-latest, r: 'oldrel-2'}
- {os: ubuntu-latest, r: 'oldrel-3'}
- - {os: ubuntu-latest, r: 'oldrel-4'}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
diff --git a/DESCRIPTION b/DESCRIPTION
index 2b9ffe4..7f72ae9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
Package: RcppAlgos
-Version: 2.8.3
+Version: 2.8.4
Title: High Performance Tools for Combinatorics and Computational Mathematics
Description: Provides optimized functions and flexible combinatorial iterators
implemented in C++ for solving problems in combinatorics and
@@ -28,6 +28,7 @@ BugReports: https://github.com/jwood000/RcppAlgos/issues
LinkingTo: cpp11
Imports: gmp, methods
Suggests: testthat, partitions, microbenchmark, knitr, RcppBigIntAlgos, rmarkdown
+Config/Needs/website: pkgdown
License: GPL (>=2)
SystemRequirements: gmp (>= 4.2.3)
VignetteBuilder: knitr
diff --git a/NEWS.md b/NEWS.md
index fca15da..0310a2a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,14 @@
+# RcppAlgos 2.8.
+
+## New Features:
+
+* Can now pass integer convertible results to the sample functions for the `n` parameter.
+
+## Bug Fixes:
+
+* Fixed bug with iterator when multithreading and exhausting the iterator.
+* Enforced values being converted to a primitive to be of length 1.
+
# RcppAlgos 2.8.3
## New Features:
diff --git a/README.md b/README.md
index 22fc5b4..52d8466 100644
--- a/README.md
+++ b/README.md
@@ -2,13 +2,13 @@
# RcppAlgos
-[![R build status]()]()
[![CRAN status]()]()
![]()
![]()
-[![Coverage status]()]()
[![Codacy Badge]()]()
[![Dependencies]()]()
+[![R-CMD-check](https://github.com/jwood000/RcppAlgos/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/jwood000/RcppAlgos/actions/workflows/R-CMD-check.yml)
+[![Codecov test coverage](https://codecov.io/gh/jwood000/RcppAlgos/branch/main/graph/badge.svg)](https://app.codecov.io/gh/jwood000/RcppAlgos?branch=main)
A collection of high performance functions and iterators implemented in C++ for solving problems in combinatorics and computational mathematics.
diff --git a/SO_Answers/Calculate : Translate vector of numbers in binary matrix : vector in R.R b/SO_Answers/Calculate : Translate vector of numbers in binary matrix : vector in R.R
new file mode 100644
index 0000000..9fe7847
--- /dev/null
+++ b/SO_Answers/Calculate : Translate vector of numbers in binary matrix : vector in R.R
@@ -0,0 +1,20 @@
+reprex::reprex({
+ #' Here is a one-liner using `RcppAlgos` (I am the author):
+
+ vector <- c(1,2,3)
+
+ library(RcppAlgos)
+ toBinary <- function(v) permuteGeneral(0:1, length(v), TRUE)[-1,]
+
+ toBinary(vector)
+
+ #' The `[-1, ]` is to remove the row of all zeros. This row would represent the empty set in a [power set](https://en.wikipedia.org/wiki/Power_set). In fact, what you are asking for is technically a mapping from the power set of a vector (minus the empty set of course) to a binary matrix.
+ #'
+ #' If you really want the `row.names` to be the actual permutations, you can use the `powerSet` function from the `rje` package. Observe:
+
+ library(rje)
+ nameTest <- toBinary(vector)
+ row.names(nameTest) <- lapply(powerSet(rev(vector))[-1], sort)
+
+ nameTest
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
\ No newline at end of file
diff --git a/SO_Answers/Combination of N elements with q elements in R.R b/SO_Answers/Combination of N elements with q elements in R.R
new file mode 100644
index 0000000..f61fac0
--- /dev/null
+++ b/SO_Answers/Combination of N elements with q elements in R.R
@@ -0,0 +1,34 @@
+reprex::reprex({
+ #' These are permutations of multisets. Currently there are 4 packages on CRAN that can handle such tasks. They are: `partitions`, `multicool`, `arrangements` (which as @StéphaneLaurent points out is a replacement for `iterpc`), and `RcppAlgos` (I am the author).
+ #'
+ #' Both `arrangements` and `RcppAlgos` produce results in lexicographical order, producing a `matrix` where each row is a different permutation. Both of these packages offer access to memory efficient iterators for attacking larger cases.
+
+ RcppAlgos::permuteGeneral(3, freqs = c(1, 2, 1))
+
+ ## Or using S3 method for class 'table'
+ RcppAlgos::permuteGeneral(table(c(1, 2, 2, 3)))
+
+ #' The package `multicool` produces permutations row by row in a matrix in coolex order which is similar to [colexicographical order](https://en.wikipedia.org/wiki/Colexicographical_order). This package also offers iterators via `nextPerm`.
+
+ multicool::allPerm(multicool::initMC(c(1, 2, 2, 3)))
+
+ #' Finally, `partitions::multiset` outputs a `partitions` objects whereby each column represents a new permutation. It is based off of Knuth's algorithm as outlined in the [The Art of Computer Programming](https://en.wikipedia.org/wiki/The_Art_of_Computer_Programming).
+
+ partitions::multiset(c(1, 2, 2, 3))
+
+ #' Here are some benchmarks:
+
+ library(microbenchmark)
+ options(digits = 4)
+
+ microbenchmark(
+ arrangements = arrangements::permutations(x = 0:3, freq = 5:2),
+ multicool = multicool::allPerm(multicool::initMC(rep(0:3, times = 5:2))),
+ partitions = partitions::multiset(rep(0:3, times = 5:2)),
+ RcppAlgos = RcppAlgos::permuteGeneral(0:3, freqs = 5:2),
+ unit = "relative",
+ times = 40
+ )
+
+ #' For more information regarding problems like this in `R`, I wrote an [extensive overview](https://stackoverflow.com/a/47983855/4408538) to the question : [How to generate permutations or combinations of object in R?](https://stackoverflow.com/q/22569176/4408538).
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/Constrained matrices.R b/SO_Answers/Constrained matrices.R
new file mode 100644
index 0000000..c3d036d
--- /dev/null
+++ b/SO_Answers/Constrained matrices.R
@@ -0,0 +1,139 @@
+reprex::reprex({
+ #' The OP is looking for all integer partitions of the number 100 of maximal length of 25. The package `partitions` is equipped with a function exactly for this purpose called `restrictedparts`. E.g.:
+
+ library(partitions)
+
+ ## Keep the output tidy
+ options(digits = 4)
+ options(width = 90)
+
+ ## all integer partitions of 10 of maximal length = 4
+ restrictedparts(10, 4)
+
+ #' Once all of the them have been generated, simply create a 5x5 matrix of each combinations (`restrictedparts` doesn't differentiate between `0 0 3` and `0 3 0`). The only problem is that there are so many possible combinations (`partitions::R(25, 100, TRUE) = 139620591`) the function throws an error when you call `restrictedparts(100, 25)`.
+
+ test <- restrictedparts(100, 25)
+
+ #' Since we can't generate them all via `restrictedparts`, we can generate them individually using `firstrestrictedpart` along with `nextrestrictedpart` like so:
+
+ funPartition <- function(p, n) {
+ mat <- matrix(nrow = 25, ncol = n)
+ mat[, 1] <- p
+
+ for (i in 2:n) {
+ p <- nextrestrictedpart(p)
+ mat[, i] <- p
+ }
+
+ mat
+ }
+
+ head(funPartition(firstrestrictedpart(100, 25), 5))
+
+ #' The only problem here is that the iterators aren't as efficient due continuously copying.
+ #'
+ #' ## **Enter RcppAlgos**
+ #'
+ #' There is a faster way using the package `RcppAlgos` (I am the author). Similar to the `partitions` package, there is a function, `partitionsGeneral`, for generating all of partitions.
+
+ library(RcppAlgos)
+ ## Target is implicitly set to 100 below. For different targets, explicitly
+ ## set the target parameter. E.g.:
+ ##
+ ## partitionsGeneral(0:100, 25, TRUE, target = 200, upper = 10^5)
+ ##
+ ## Will generate the first 10^5 partitions of 200 using the vector 0:100
+
+ matrixParts <- apply(
+ partitionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
+ MARGIN = 1, \(x) matrix(x, ncol = 5), simplify = FALSE
+ )
+
+ all(sapply(matrixParts, sum) == 100)
+
+ matrixParts[c(1, 90, 10^5)]
+
+ #' ### A Better Approach: Iterators
+ #'
+ #' There are memory efficient iterators available as well for many topics in combinatorics including integer partitions (E.g. `partitionsIter`).
+ #'
+ #' Using iterators, we could create a helper function that could transform each result to our desired matrix.
+
+ matFromIter <- function(it, ncol = 5L) {
+ matrix(it@nextIter(), ncol = ncol)
+ }
+
+ ## Initialize partitions iterator
+ it <- partitionsIter(0:100, 25, repetition = TRUE)
+ ## Get the first 3 results
+ replicate(3, matFromIter(it))
+ ## Get 2 more picking up where we left off above
+ replicate(2, matFromIter(it))
+ ## Reset iterator
+ it@startOver()
+ ## Get random lexicographical result using the method: `[[`
+ matrix(it[[1e6]], ncol = 5)
+ ## Get the last one
+ matrix(it@back(), ncol = 5)
+
+ #' ### Need Permutations?
+ #'
+ #' If you really want permutations, no problem, just call `compositionsGeneral`:
+
+ matrixComps <- apply(
+ compositionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
+ MARGIN = 1, \(x) matrix(x, ncol = 5), simplify = FALSE
+ )
+
+ all(sapply(matrixComps, sum) == 100)
+
+ ## Compare to the output of matrixCombs
+ matrixComps[c(1, 90, 10^5)]
+
+ #' ### Random Sampling
+ #'
+ #' Since the number of results is so massive, sampling may be our best option. Consider how many total results we are dealing with:
+
+ partitionsCount(0:100, 25, TRUE)
+
+ compositionsCount(0:100, 25, TRUE)
+
+ #' We can use either `partitionsSample` or `compositionsSample` to quickly generate a candidates that can be transformed into the desired matrix output.
+
+ ## Optional, use seed parameter for reproducibility
+ apply(partitionsSample(0:100, 25, TRUE, n = 3, seed = 42), 1, \(x) {
+ matrix(x, ncol = 5)
+ }, simplify = FALSE)
+
+ apply(compositionsSample(0:100, 25, TRUE, n = 3, seed = 28), 1, \(x) {
+ matrix(x, ncol = 5)
+ }, simplify = FALSE)
+
+ #' ### Efficiency
+ #'
+ #' All of the functions are written in `C++` for ultimate efficiency. Consider iterating over 10,000 partitions.
+
+ library(microbenchmark)
+ pkg_partitions <- function(n, k, total) {
+ a <- firstrestrictedpart(n, k)
+ for (i in 1:(total - 1)) a <- nextrestrictedpart(a)
+ }
+
+ pkg_RcppAlgos <- function(n, k, total) {
+ a <- partitionsIter(0:n, k, repetition = TRUE)
+ for (i in 1:total) a@nextIter()
+ }
+
+ microbenchmark(cbRcppAlgos = pkg_RcppAlgos(100, 25, 10^4),
+ cbPartitions = pkg_partitions(100, 25, 10^4),
+ times = 25, unit = "relative")
+
+ #' And generating 10^5 random samples takes no time, especially when using multiple threads:
+
+ system.time(partitionsSample(0:100, 25, TRUE, nThreads = 6,
+ n = 1e5, seed = 42))
+
+ system.time(compositionsSample(0:100, 25, TRUE, nThreads = 6,
+ n = 1e5, seed = 28))
+
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/Estimate combinations in panel data in r?.R b/SO_Answers/Estimate combinations in panel data in r?.R
new file mode 100644
index 0000000..7977826
--- /dev/null
+++ b/SO_Answers/Estimate combinations in panel data in r?.R
@@ -0,0 +1,107 @@
+reprex::reprex({
+ #' Update
+ #' ---
+ #' Here is a solution that is a good starting place, but could probably be improved:
+
+ library(RcppAlgos)
+ getCombs <- function(myMat, upper = NULL, minYears = NULL) {
+
+ numRows <- nrow(myMat)
+ myColNames <- colnames(myMat)
+
+ if (is.null(minYears)) ## set default
+ repZero <- numRows - 1
+ else if (minYears >= numRows || minYears < 1) ## check for extreme cases
+ repZero <- numRows - 1
+ else
+ repZero <- numRows - minYears
+
+ combs <- comboGeneral(v = 0:numRows, m = numRows,
+ freqs = c(repZero, rep(1, numRows)),
+ upper = upper)
+
+ ## I think this part could be improved
+ out <- lapply(1:nrow(combs), \(x) {
+ myRows <- myMat[combs[x,],]
+
+ if (is.null(nrow(myRows)))
+ result <- !is.na(myRows)
+ else
+ result <- complete.cases(t(myRows))
+
+ myColNames[result]
+ })
+
+ myRowNames <- rownames(myMat)
+ names(out) <- lapply(1:nrow(combs), \(x) {
+ myRowNames[combs[x, combs[x, ] > 0]]
+ })
+ out
+ }
+
+ #' Here is the output for the OP's example. (The OP is missing 5 of the outcomes below):
+
+ testMat <- matrix(
+ c(NA, 50, NA, 85,
+ 110, 75, 76, 86,
+ 120, NA, 78, 87,
+ 130, 100, 80, 88),
+ nrow = 4, byrow = TRUE
+ )
+
+ row.names(testMat) <- 2000:2003
+ colnames(testMat) <- LETTERS[1:4]
+ getCombs(testMat)
+
+ #' However, this answer, or any future answer for that matter, won't get you every combination as you have 144 countries and 47 years of data. That produces a very _very_ large number. Every combination of any length up to _n_ is equivalent to the [power set](https://en.wikipedia.org/wiki/Power_set). The number of elements in the power set is simply `2^n`. Since we are not counting the equivalent of the empty set, we need to subtract one, thus:
+
+ suppressWarnings(suppressMessages(library(gmp)))
+ sub.bigz(pow.bigz(2, 47), 1)
+
+ #' Yes, that is over one hundred trillion!!! You will probably need to rethink your approach as there are simply too many outcomes.
+ #'
+ #' All is not lost! You can make use of the `upper` argument, to limit the number of outcomes, so you can still investigate possible combinations. Observe:
+
+ set.seed(11111)
+ biggerTest <- matrix(sample(100, 20*20, replace = TRUE), nrow = 20)
+
+ colnames(biggerTest) <- LETTERS[1:20]
+ rownames(biggerTest) <- 1988:2007
+
+ ## set 10% of values to NA
+ myNAs <- sample(400, 400 / 10)
+ biggerTest[myNAs] <- NA
+
+ biggerTest[1:6, 1:10]
+
+ ## Getting all 1,048,575 results takes a good bit of time
+ system.time(allResults <- getCombs(biggerTest))
+
+ ## Using upper greatly reduces the amount of time
+ system.time(smallSampTest <- getCombs(biggerTest, upper = 10000))
+
+ #' Alternatively, you can use the `minYears` argument to only return results with a minimum number of combinations of years. For example, per the OP's comments to @CPak's answer, if you only want to see results with 15 or more years of combinations, we have:
+
+ system.time(minYearTest <- getCombs(biggerTest, minYears = 15))
+
+ set.seed(123)
+ minYearTest[sample(length(minYearTest), 5)]
+
+ #' Or use both arguments together:
+
+ system.time(
+ bothConstraintsTest <- getCombs(biggerTest, 10000, minYears = 10)
+ )
+
+ bothConstraintsTest[1:5]
+
+ #' Explanation
+ #' ===
+ #' The first thing we need to do is to determine every combination of _n_ years. This boils down to finding all _n_-tuples of the [multiset](https://en.wikipedia.org/wiki/Multiset) `c(rep(0, n-1), 1:n)` or equivalently, the power set of an _n_ element set minus the empty set. For example, for the years `2000:2003` (4 year span), the possible combinations are given by:
+
+ comboGeneral(v = 0:4, m = 4, freqs = c(3, rep(1, 4)))
+
+ #' Now, we iterate over each row of our combinations where each row tells us which combination of rows from the original matrix to test for `NAs`. If the particular combination only contains one result, we determine which indices are not `NA`. That is easily carried out by `!is.na(`. If we have more than one row, we employ `complete.cases(t` to obtain the columns that have only numbers (i.e. no occurrences of `NA`).
+ #'
+ #' After this we are just using indexing to obtain names for our outcomes and Voila, we have our desired results.
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/Generate a list of primes up to a certain number.R b/SO_Answers/Generate a list of primes up to a certain number.R
new file mode 100644
index 0000000..2f1461e
--- /dev/null
+++ b/SO_Answers/Generate a list of primes up to a certain number.R
@@ -0,0 +1,266 @@
+reprex::reprex({
+
+ #' ## Prime Numbers in R
+ #'
+ #' The OP asked to generate all prime numbers below one billion. All of the answers provided thus far are either not capable of doing this, will take a long a time to execute, or currently not available in R (see the [answer](https://stackoverflow.com/a/3790109/4408538) by @Charles). The package `RcppAlgos` (I am the author) is capable of generating the requested output in just under `1 second` using only one thread. It is based off of the segmented sieve of Eratosthenes by [Kim Walisch](https://github.com/kimwalisch/primesieve).
+ #'
+ #' ### `RcppAlgos`
+
+ ser <- system.time(RcppAlgos::primeSieve(1e9)) ## using 1 thread
+ ser
+
+ #' ### Using Multiple Threads
+ #'
+ #' And in recent versions (i.e. `>= 2.3.0`), we can utilize multiple threads for even faster generation. For example, now we can generate the primes up to **1 billion in under a quarter of a second!**
+
+ par <- system.time(RcppAlgos::primeSieve(10^9, nThreads = 8))
+ par
+
+ #' ### Summary of Available Packages in R for Generating Primes
+ #'
+ #' 1. `schoolmath`
+ #' 2. `primefactr`
+ #' 3. `sfsmisc`
+ #' 4. `primes`
+ #' 5. `numbers`
+ #' 6. `spuRs`
+ #' 7. `randtoolbox`
+ #' 8. `matlab`
+ #' 9. `sieve` from @John (see below.. we removed `if(n > 1e8) stop("n too large")`)
+
+ sieve <- function(n) {
+ n <- as.integer(n)
+ primes <- rep(TRUE, n)
+ primes[1] <- FALSE
+ last.prime <- 2L
+ fsqr <- floor(sqrt(n))
+ while (last.prime <= fsqr) {
+ primes[seq.int(2L*last.prime, n, last.prime)] <- FALSE
+ sel <- which(primes[(last.prime+1):(fsqr+1)])
+ if(any(sel)){
+ last.prime <- last.prime + min(sel)
+ }else last.prime <- fsqr+1
+ }
+ which(primes)
+ }
+
+ #' Before we begin, we note that the problems pointed out by @Henrik in `schoolmath` still exists. Observe:
+
+ ## 1 is NOT a prime number
+ schoolmath::primes(start = 1, end = 20)
+
+ ## This should return 1, however it is saying that 52
+ ## "prime" numbers less than 10^4 are divisible by 7!!
+ sum(schoolmath::primes(start = 1, end = 10^4) %% 7L == 0)
+
+ #' The point is, don't use `schoolmath` for generating primes at this point (I have filed an issue with the maintainer).
+ #'
+ #' Let's look at `randtoolbox` as it appears to be incredibly efficient. Observe:
+
+ library(microbenchmark)
+ options(digits = 4)
+ options(width = 90)
+
+ ## the argument for get.primes is for how many prime numbers you need
+ ## whereas most packages get all primes less than a certain number
+ microbenchmark(randtoolbox = randtoolbox::get.primes(78498),
+ RcppAlgos = RcppAlgos::primeSieve(10^6),
+ unit = "relative")
+
+ #' A closer look reveals that it is essentially a lookup table (found in the file `randtoolbox.c` from the source code).
+
+ #include "primes.h"
+ #
+ # void reconstruct_primes()
+ # {
+ # int i;
+ # if (primeNumber[2] == 1)
+ # for (i = 2; i < 100000; i++)
+ # primeNumber[i] = primeNumber[i-1] + 2*primeNumber[i];
+ # }
+
+ #' Where `primes.h` is a header file that contains an array of _"halves of differences between prime numbers"_. Thus, you will be limited by the number of elements in that array for generating primes (i.e. the first one hundred thousand primes). If you are only working with smaller primes (less than `1,299,709` (i.e. the 100,000th prime)) and you are working on a project that requires the `nth` prime, `randtoolbox` is not a bad option.
+ #'
+ #' Below, we perform benchmarks on the rest of the packages.
+ #'
+ #' ### Primes up to One Million
+
+ million <- microbenchmark(
+ RcppAlgos = RcppAlgos::primeSieve(10^6),
+ numbers = numbers::Primes(10^6),
+ spuRs = spuRs::primesieve(c(), 2:10^6),
+ primes = primes::generate_primes(1, 10^6),
+ primefactr = primefactr::AllPrimesUpTo(10^6),
+ sfsmisc = sfsmisc::primes(10^6),
+ matlab = matlab::primes(10^6),
+ JohnSieve = sieve(10^6),
+ unit = "relative"
+ )
+
+ million
+
+ #' ### Primes up to Ten Million
+
+ ten_million <- microbenchmark(
+ RcppAlgos = RcppAlgos::primeSieve(10^7),
+ numbers = numbers::Primes(10^7),
+ spuRs = spuRs::primesieve(c(), 2:10^7),
+ primes = primes::generate_primes(1, 10^7),
+ primefactr = primefactr::AllPrimesUpTo(10^7),
+ sfsmisc = sfsmisc::primes(10^7),
+ matlab = matlab::primes(10^7),
+ JohnSieve = sieve(10^7),
+ unit = "relative",
+ times = 20
+ )
+
+ ten_million
+
+ #' ### Primes up to One Hundred Million
+ #'
+ #' For the next two benchmarks, we only consider `RcppAlgos`, `sfsmisc`, `primes`, and the `sieve` function by @John.
+
+ hundred_million <- microbenchmark(
+ RcppAlgos = RcppAlgos::primeSieve(10^8),
+ sfsmisc = sfsmisc::primes(10^8),
+ primes = primes::generate_primes(1, 10^8),
+ JohnSieve = sieve(10^8),
+ unit = "relative",
+ times = 20
+ )
+
+ hundred_million
+
+ #' ### Primes up to One Billion
+ #'
+ #' N.B. We must remove the condition `if(n > 1e8) stop("n too large")` in the `sieve` function.
+
+ ## See top section
+ ## system.time(primeSieve(10^9))
+
+ invisible(gc())
+ pm_1e9 <- system.time(primes::generate_primes(1, 10^9))
+ pm_1e9
+
+ invisible(gc())
+ sieve_1e9 <- system.time(sieve(10^9))
+ sieve_1e9
+
+ invisible(gc())
+ sfs_1e9 <- system.time(sfsmisc::primes(10^9))
+ sfs_1e9
+
+ #' From these comparison, we see that `RcppAlgos` scales much better as _n_ gets larger.
+
+ suppressWarnings(suppressMessages(library(dplyr)))
+ suppressWarnings(suppressMessages(library(purrr)))
+
+ billion <- tibble(
+ expr = c("RcppAlgos", "primes", "sfsmisc", "JohnSieve"),
+ time = c(ser["elapsed"], pm_1e9["elapsed"],
+ sfs_1e9["elapsed"], sieve_1e9["elapsed"])
+ )
+
+ my_scale <- \(x) x / min(x, na.rm = TRUE)
+
+ time_table <- map(
+ list(million, ten_million, hundred_million, billion),
+ ~ .x %>% group_by(expr) %>% summarise(med = median(time))
+ ) %>%
+ reduce(left_join, by = join_by(expr)) %>%
+ rename(`1e6` = 2, `1e7` = 3, `1e8` = 4, `1e9` = 5) %>%
+ mutate(across(2:5, ~ my_scale(.x)))
+
+ knitr::kable(
+ time_table %>%
+ mutate(across(2:5, ~ round(my_scale(.x), 2)))
+ )
+
+ #' The difference is even more dramatic when we utilize multiple threads:
+
+ algos_time <- list(
+ algos_1e6 = microbenchmark(
+ ser = RcppAlgos::primeSieve(1e6),
+ par = RcppAlgos::primeSieve(1e6, nThreads = 8), unit = "relative"
+ ),
+ algos_1e7 = microbenchmark(
+ ser = RcppAlgos::primeSieve(1e7),
+ par = RcppAlgos::primeSieve(1e7, nThreads = 8), unit = "relative"
+ ),
+ algos_1e8 = microbenchmark(
+ ser = RcppAlgos::primeSieve(1e8),
+ par = RcppAlgos::primeSieve(1e8, nThreads = 8),
+ unit = "relative", times = 20
+ ),
+ algos_1e9 = microbenchmark(
+ ser = RcppAlgos::primeSieve(1e9),
+ par = RcppAlgos::primeSieve(1e9, nThreads = 8),
+ unit = "relative", times = 10
+ )
+ )
+
+ ser_vs_par <- map(
+ algos_time, ~ .x %>% group_by(expr) %>% summarise(med = median(time))
+ ) %>%
+ reduce(left_join, by = join_by(expr)) %>%
+ rename(`1e6` = 2, `1e7` = 3, `1e8` = 4, `1e9` = 5) %>%
+ mutate(across(2:5, ~ my_scale(.x))) %>%
+ filter(expr == "ser") %>%
+ unlist()
+
+ #' And multiplying the table above by the respective median times for the serial results:
+
+ time_parallel <- bind_cols(
+ expr = time_table[, 1],
+ map(2:5, \(x) time_table[, x] * ser_vs_par[x]),
+ ) %>%
+ add_row(expr = "RcppAlgos-Par",
+ `1e6` = 1, `1e7` = 1, `1e8` = 1, `1e9` = 1) %>%
+ arrange(`1e6`)
+
+ knitr::kable(
+ time_parallel %>%
+ mutate(across(2:5, ~ round(my_scale(.x), 2)))
+ )
+
+ #' ### Primes Over a Range
+
+ microbenchmark(priRcppAlgos = RcppAlgos::primeSieve(10^9, 10^9 + 10^6),
+ priNumbers = numbers::Primes(10^9, 10^9 + 10^6),
+ priPrimes = primes::generate_primes(10^9, 10^9 + 10^6),
+ unit = "relative", times = 20)
+
+ #' ### Primes up to 10 billion in Under 3 Seconds
+
+ ## primes less than 10 billion
+ system.time(tenBillion <- RcppAlgos::primeSieve(10^10, nThreads = 8))
+
+ length(tenBillion)
+
+ ## Warning!!!... Large object created
+ tenBillionSize <- object.size(tenBillion)
+ print(tenBillionSize, units = "Gb")
+
+ rm(tenBillionSize)
+ invisible(gc())
+
+ #' ### Primes Over a Range of Very Large Numbers:
+ #'
+ #' Prior to version `2.3.0`, we were simply using the same algorithm for numbers of every magnitude. This is okay for smaller numbers when most of the sieving primes have at least one multiple in each segment (Generally, the segment size is limited by the size of `L1 Cache ~32KiB`). However, when we are dealing with larger numbers, the sieving primes will contain many numbers that will have fewer than one multiple per segment. This situation creates a lot of overhead, as we are performing many worthless checks that pollutes the cache. Thus, we observe much slower generation of primes when the numbers are very large. If you want to test yourself, see [Installing older version of R package](https://stackoverflow.com/a/17082609/4408538)).
+ #'
+ #' In later versions (>= `2.3.0`), we are using the cache friendly improvement originally developed by [Tomás Oliveira](). The improvements are drastic:
+
+ ## Over 3x faster than older versions
+ system.time(cacheFriendly <- RcppAlgos::primeSieve(1e15, 1e15 + 1e9))
+
+ ## Over 8x faster using multiple threads
+ system.time(RcppAlgos::primeSieve(1e15, 1e15 + 1e9, nThreads = 8))
+
+ #' ### Take Away
+ #'
+ #' 1. There are many great packages available for generating primes
+ #' 2. If you are looking for speed in general, there is no match to `RcppAlgos::primeSieve`, especially for larger numbers.
+ #' 3. If you need primes in a range, the packages `numbers`, `primes`, & `RcppAlgos` are the way to go.
+ #' 4. The importance of good programming practices cannot be overemphasized (e.g. vectorization, using correct data types, etc.). This is most aptly demonstrated by the pure base R solution provided by @John. It is concise, clear, and very efficient.
+
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
\ No newline at end of file
diff --git a/SO_Answers/Generate permutations of specific length with repetition in R?.R b/SO_Answers/Generate permutations of specific length with repetition in R?.R
new file mode 100644
index 0000000..edc3d89
--- /dev/null
+++ b/SO_Answers/Generate permutations of specific length with repetition in R?.R
@@ -0,0 +1,49 @@
+reprex::reprex({
+ #' There are several packages that will produce exactly what you need. Let's start with the classic `gtools`. Also, from the looks of the example provided by the OP, we are looking for permutations without repetition, not combinations with repetition.
+
+ wordsList <- c("alice", "moon", "walks", "mars", "sings", "guitar", "bravo")
+
+ library(gtools)
+ attempt1 <- permutations(length(wordsList), 3, wordsList)
+ head(attempt1)
+
+ #' Then there is `arrangements`:
+
+ library(arrangements)
+ attempt2 <- permutations(wordsList, 3)
+ head(attempt2)
+
+ #' And finally, `RcppAlgos` (I am the author):
+
+ library(RcppAlgos)
+ attempt3 <- permuteGeneral(wordsList, 3)
+
+ #' They are all fairly efficient and produce similar outcomes (different orderings)
+
+ identical(attempt1[do.call(order,as.data.frame(attempt1)),],
+ attempt2[do.call(order,as.data.frame(attempt3)),])
+
+ identical(attempt2, attempt3)
+
+ #' If you really want permutations with repetition, each function provides an argument for carrying out that function. For `gtools` set `repeats.allowed = TRUE`, for `arrangments` set `replace = TRUE`, and finally for `RcppAlgos` set `repetition = TRUE`.
+ #'
+ #' Since the OP is working with a `wordsList` with many words and is looking for all permutations chosen 15 at a time, the aforementioned methods will fail. However, there are some alternatives from `arrangements` as well as `RcppAlgos` that _may_ help.
+ #'
+ #' With `arrangements` you can use the function `getnext` and produce successive permutations. Generating them all will still take quite a long time.
+ #'
+ #' As with `arrangements`, `RcppAlgos` provides access to memory efficient iterators. They are quite flexible and very fast.
+
+ it <- permuteIter(wordsList, 3)
+
+ ## start iterating
+ it@nextIter()
+
+ ## draw n at a time
+ it@nextNIter(n = 3)
+
+ ## get the last one
+ it@back()
+
+ ## iterate in reverse
+ it@prevIter()
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
\ No newline at end of file
diff --git a/SO_Answers/How can I obtain some of all possible combinations in R?.R b/SO_Answers/How can I obtain some of all possible combinations in R?.R
new file mode 100644
index 0000000..e43c0a2
--- /dev/null
+++ b/SO_Answers/How can I obtain some of all possible combinations in R?.R
@@ -0,0 +1,114 @@
+reprex::reprex({
+ #' It sounds like the OP is describing an [iterator](https://en.wikipedia.org/wiki/Iterator#:~:text=In%20computer%20programming%2C%20an%20iterator,via%20a%20container's%20interface.). There is a package called `iterators` that seems promising, however a major drawback is that one needs to generate the object upfront in order to iterate. That does not help us in the OP's case where we are trying to avoid generating the entire object upfront.
+ #'
+ #' There is a package `RcppAlgos` (I am the author) that provides flexible combinatorial iterators that allow one to traverse forwards, backwards, and even allows random access via `[[`. The underlying algorithms are written in `C++` for maximum efficiency. Results are produces on the fly, which keeps memory usage in check, all while preserving the state.
+
+ library(RcppAlgos)
+ it <- comboIter(25, 5)
+
+ ## Get the first iteration
+ it@nextIter()
+
+ ## See the current state
+ it@summary()
+
+ ## Get the next 7 iterations
+ it@nextNIter(n = 7)
+
+ ## See the current state
+ it@summary()
+
+ ## Go back to the previous iteration
+ it@prevIter()
+
+ ## Skip ahead to the 20000th iteration instantly
+ it[[20000]]
+
+ ## See the current state. Notice we are at 20000
+ it@summary()
+
+ ## Start iterating from there
+ it@nextIter()
+
+ ## Reset the iterator
+ it@startOver()
+
+ ## Results are identical
+ identical(
+ replicate(choose(25, 5), it@nextIter(), simplify = "array"),
+ combn(25, 5)
+ )
+
+ #' These iterators are very efficient. Even with all of the communication back and forth between `R` and `C++`, iterating "one at a time" is almost as fast as `combn` generating all of them upfront.
+
+ library(microbenchmark)
+ options(digits = 4)
+ options(width = 90)
+
+ ## helper functions for resetting the iterator
+ one_at_a_time <- function() {
+ it@startOver()
+ replicate(choose(25, 5), it@nextIter())
+ }
+
+ microbenchmark(
+ RcppAlgos_single = one_at_a_time(),
+ combn_all = combn(25, 5),
+ unit = "relative"
+ )
+
+ #' Even better, when we shift from "one at a time" to just a few at a time, the speed up is substantial. Observe:
+
+ multiple_at_a_time <- function(n) {
+ it@startOver()
+ replicate(choose(25, 5) / n, it@nextNIter(n = n))
+ }
+
+ microbenchmark(
+ RcppAlgos_chunks = multiple_at_a_time(30),
+ combn_all = combn(25, 5),
+ unit = "relative"
+ )
+
+ #' We can even evaluate each combination on the fly using the `FUN` parameter:
+
+ ## the FUN.VALUE parameter is optional. If NULL (the default), when
+ ## multiple results are requested via nextNIter, prevNIter, nextRemaining,
+ ## and prevRemaining, a list will be returned. This parameter is modeled
+ ## after the usage in vapply
+ it_fun <- comboIter(25, 5, FUN = cumprod, FUN.VALUE = cumprod(1:5))
+
+ it_fun@nextIter()
+
+ it_fun@nextNIter(n = 2)
+
+ ## See the previous results in reverse order
+ it_fun@prevRemaining()
+
+ #' Finally, the `[[` method allows for random access of a single result or multiple results:
+
+ ## As seen above
+ it[[20000]]
+
+ ## Pass a random sample. N.B. In this case the state is unaffected. That
+ ## is, it will remain whatever it was prior to passing the vector. In our
+ ## case, it will still be on the 20000 index.
+ set.seed(42)
+ it[[sample(choose(25, 5), 10)]]
+
+ ## State unaffected
+ it@summary()
+
+ ## Sample with replacement
+ it[[sample(choose(25, 5), 5, replace = TRUE)]]
+
+ #' There are also combinatorial sampling functions in `RcppAlgos`. For our current case, we would call upon `comboSample`.
+
+ ## Same as above:
+ ## set.seed(42)
+ ## it[[sample(choose(25, 5), 10)]]
+ comboSample(25, 5, seed = 42, n = 10)
+
+ #' See my answer to [How to resample in R without repeating permutations?](https://stats.stackexchange.com/a/647976/139295) for more info.
+
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
\ No newline at end of file
diff --git a/scripts/SO_Comb_Perm_in_R.R b/SO_Answers/How to generate permutations or combinations of object in R?.R
similarity index 100%
rename from scripts/SO_Comb_Perm_in_R.R
rename to SO_Answers/How to generate permutations or combinations of object in R?.R
diff --git a/SO_Answers/How to resample in R without repeating permutations?.R b/SO_Answers/How to resample in R without repeating permutations?.R
new file mode 100644
index 0000000..7d1b733
--- /dev/null
+++ b/SO_Answers/How to resample in R without repeating permutations?.R
@@ -0,0 +1,110 @@
+reprex::reprex({
+ #' The package `RcppAlgos` (I am the author) has some functions specifically for this type of problem. Observe:
+
+ library(RcppAlgos)
+ permuteSample(v = 0:10, m = 5, seed = 123, n = 3)
+
+ ## Or use global RNG to get the same results
+ set.seed(123)
+ permuteSample(v = 0:10, m = 5, n = 3)
+
+ #' They are guaranteed to sample **without** replacement:
+
+ permuteCount(3)
+ permuteSample(3, n = 7)
+
+ #' If we request the maximal number of samples, it will be equivalent to generating all results and shuffling:
+
+ ps <- permuteSample(3, n = 6, seed = 100)
+
+ ## Shuffled
+ ps
+
+ ## Same as getting them all
+ identical(
+ permuteGeneral(3),
+ ps[do.call(order, as.data.frame(ps)), ]
+ )
+
+ #' ## Flexibility
+ #'
+ #' These functions are quite general as well. For example, if we want to sample a vector where repetition _within the vector_ is allowed set `repetition = TRUE` (not to be confused with sampling with replacement):
+
+ permuteSample(v = 0:10, m = 5, repetition = TRUE, seed = 42, n = 3)
+
+ ## Go beyond the vector length
+ permuteSample(v = 3, m = 10, repetition = TRUE, seed = 42, n = 3)
+
+ #' What about specific multiplicity for each element... easy! Use the `freqs` parameter:
+
+ ## With v = 3 and freqs = 2:4, this means:
+ ##
+ ## 1 can occur a maximum of 2 times
+ ## 2 can occur a maximum of 3 times
+ ## 3 can occur a maximum of 4 times
+ ##
+ permuteSample(v = 3, m = 6, freqs = 2:4, seed = 1234, n = 3)
+
+ #' It works on all atomic types as well:
+
+ ## When m is NULL, the length defaults to the length of v
+ set.seed(28)
+ permuteSample(
+ v = rnorm(3) + rnorm(3) * 1i,
+ repetition = TRUE,
+ n = 5
+ )
+
+ #' Sampling With Replacement
+ #'
+ #' The default behavior of the sampling functions in `RcppAlgos` is to sample without replacement. If you need sampling with replacement, we can make use of the `sampleVec` parameter. This parameter takes a vector and interprets them as indices representing the lexicographical permutations to return. For example:
+
+ permuteSample(3, sampleVec = c(1, 3, 6))
+
+ ## Same as
+ permuteGeneral(3)[c(1, 3, 6), ]
+
+ #' This means we can generate our own random sample of indices and pass it to `permuteSample`:
+
+ set.seed(123)
+ idx <- sample(permuteCount(3), 5, replace = TRUE)
+ idx
+
+ permuteSample(3, sampleVec = idx)
+
+ ## See rownames corresponding to the lexicographical result
+ permuteSample(3, sampleVec = idx, namedSample = TRUE)
+
+ #' ## Efficiency
+ #'
+ #' The functions are written in `C++` with efficiency in mind. Take for example the largest case @whuber tests:
+
+ system.time(size_10 <- permuteSample(10, n = 1e6, seed = 42))
+
+ ## Guaranteed no repeated permutations!
+ nrow(unique(size_10))
+
+ system.time(size_15 <- permuteSample(15, n = 1e6, seed = 42))
+
+ ## Guaranteed no repeated permutations!
+ nrow(unique(size_15))
+
+ #' Why not try even larger cases?!?!?! Using `nThreads`, we can achieve even greater efficiency:
+
+ ## Single threaded
+ system.time(permuteSample(15, n = 1e7, seed = 321))
+
+ ## Using 4 threads
+ system.time(permuteSample(15, n = 1e7, seed = 321, nThreads = 4))
+
+ #' ## Sampling Other Combinatorial Objects
+ #'
+ #' There are also sampling functions for other combinatorial objects:
+ #'
+ #' 1. permutations: `permuteSample`
+ #' 2. combinations: `comboSample`
+ #' 3. integer partitions: `partitionsSample`
+ #' 4. integer compositions: `compositionsSample`
+ #' 5. partitions of a vector into groups `comboGroupsSample`
+
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/Optimizing calculation of combinations into list - large data set.R b/SO_Answers/Optimizing calculation of combinations into list - large data set.R
new file mode 100644
index 0000000..cd05cea
--- /dev/null
+++ b/SO_Answers/Optimizing calculation of combinations into list - large data set.R
@@ -0,0 +1,91 @@
+reprex::reprex({
+ #' Here is an answer that is much faster than the OP's solution on large test cases. It doesn't rely on `paste`, but rather we take advantage of properties of numbers and vectorized operations. We also use `comboGeneral` from the `RcppAlgos` package (I am the author) which is much faster than `combn` and `combn_prim` from the linked answer for generating combinations of a vector. First we show the efficiency gains of `comboGeneral` over the other functions:
+
+ ## library(gRbase)
+ library(RcppAlgos)
+ library(microbenchmark)
+ options(digits = 4)
+ options(width = 90)
+ options(scipen = 99)
+
+ microbenchmark(
+ gRBase = gRbase::combn_prim(300, 2),
+ utils = combn(300, 2),
+ RcppAlgos = comboGeneral(300, 2),
+ unit = "relative"
+ )
+
+ #' Now, we create a function to create some random reproducible data that will be passed to our test functions:
+
+ makeTestSet <- function(vectorSize, elementSize, mySeed = 42, withRep = FALSE) {
+ set.seed(mySeed)
+ sapply(1:vectorSize, function(x) {
+ paste(sample(10^6, s1 <- sample(2:elementSize, 1),
+ replace = withRep), collapse = " ")
+ })
+ }
+
+ makeTestSet(5, 3)
+
+ #' That looks good. Now, lets see if setting `fixed = TRUE` gets us any gains (as suggested above by @MichaelChirico):
+
+ bigVec <- makeTestSet(10, 100000)
+
+ microbenchmark(standard = strsplit(bigVec, " "),
+ withFixed = strsplit(bigVec, " ", fixed = TRUE),
+ times = 15, unit = "relative")
+
+ #' @MichaelChirico was spot on. Putting it all together we get:
+
+ combPairFast <- function(testVec) {
+ lapply(strsplit(testVec, " ", fixed = TRUE), function(x) {
+ combs <- RcppAlgos::comboGeneral(as.numeric(x), 2)
+ unique(combs[,1] * (10)^(as.integer(log10(combs[,2])) + 1L) + combs[,2])
+ })
+ }
+
+ ## test.vector defined above by OP
+ test.vector <- c(
+ "335261 344015 537633", "22404 132858",
+ "254654 355860 488288", "219943 373817",
+ "331839 404477"
+ )
+
+ combPairFast(test.vector)
+
+ ## OP original code
+ combPairOP <- function(testVec) {
+ lapply(strsplit(testVec, " "), function(x) unique(
+ apply(combn(x, 2), 2, function(y) paste0(y, collapse = "")))
+ )
+ }
+
+ #' As stated in the comments by the OP, the maximum number is less than a million (600000 to be exact), which means that after we multiply one of the numbers by at most 10^6 and add it to another 6 digit number (equivalent to simply concatenating two strings of numbers), we are guaranteed to be within the numerical precision of base R (i.e. `2^53 - 1`). This is good because arithmetic operations on numerical numbers is much more efficient than strings operations.
+ #'
+ #' All that is left is to benchmark:
+
+ test.vector <- makeTestSet(100, 50)
+
+ microbenchmark(
+ original = combPairOP(test.vector),
+ new_impl = combPairFast(test.vector),
+ times = 20,
+ unit = "relative"
+ )
+
+ #' And on larger vectors:
+
+ bigTest.vector <- makeTestSet(1000, 100, mySeed = 22, withRep = TRUE)
+
+ ## Duplicate values exist
+ any(sapply(strsplit(bigTest.vector, " ", fixed = TRUE), function(x) {
+ any(duplicated(x))
+ }))
+
+ system.time(t1 <- combPairFast(bigTest.vector))
+
+ system.time(t2 <- combPairOP(bigTest.vector))
+
+ ## results are the same
+ all.equal(t1, lapply(t2, as.numeric))
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/Permutations of data sets with duplicates in R.R b/SO_Answers/Permutations of data sets with duplicates in R.R
new file mode 100644
index 0000000..c7791ec
--- /dev/null
+++ b/SO_Answers/Permutations of data sets with duplicates in R.R
@@ -0,0 +1,68 @@
+reprex::reprex({
+ #' What you are looking for is permutations of [multisets](https://en.wikipedia.org/wiki/Multiset).
+
+ library(RcppAlgos)
+ multiPerm <- permuteGeneral(1:4, freqs = rep(2,4))
+
+ head(multiPerm)
+
+ v <- rep(1:4, each = 2)
+ v
+
+ ## Also, utilize S3 'table' method
+ head(permuteGeneral(table(v)))
+
+ #' Or using the package `arrangements`:
+
+ multiPerm2 <- arrangements::permutations(1:4, freq = rep(2, 4))
+
+ #' Sanity check:
+
+ identical(multiPerm, multiPerm2)
+
+ suppressWarnings(suppressMessages(library(combinat)))
+ library(gtools)
+
+ OPTestOne <- unlist(unique(permn(v, paste0, collapse = "")))
+ all.equal(sort(apply(multiPerm, 1, paste, collapse = "")),
+ sort(OPTestOne))
+
+ OPTestTwo <- unique(permutations(8, 8, v, set = FALSE))
+ all.equal(OPTestTwo, multiPerm)
+
+ #' Here are some benchmarks:
+
+ library(microbenchmark)
+ options(digits = 4)
+
+ microbenchmark(OP_One = unique(permn(v, paste0, collapse = "")),
+ Arnge = arrangements::permutations(1:4, freq = rep(2, 4)),
+ OP_Two = unique(permutations(8, 8, v, set = FALSE)),
+ times = 5, unit = "relative")
+
+ #' Finding permutations of multisets choose _m_ is no problem either.
+
+ microbenchmark(
+ algos = permuteGeneral(1:4, m = 11, freqs = rep(4, 4)),
+ arnge = arrangements::permutations(1:4, 11, freq = rep(4, 4))
+ )
+
+ #' The OP's guess at how many permutations (525,525) for this latter example is not correct. Finding this is a [little more involved](https://github.com/jwood000/RcppAlgos/blob/2c2a6bfd9ea56488a718addb4aa30df3d9a86ee1/src/CombinatoricsContainer.cpp#L622) than the one liner offered.
+
+ permuteCount(table(rep(1:4, each = 4)), m = 11)
+
+ arrangements::npermutations(1:4, 11, freq = rep(4, 4))
+
+ #' And just to show that these are all unique:
+
+ length(
+ unique(
+ apply(
+ permuteGeneral(1:4, m = 11, freqs = rep(4, 4)),
+ MARGIN = 1, paste, collapse = ""
+ )
+ )
+ )
+
+ #' For more information regarding problems like this in R, I wrote a [thorough overview](https://stackoverflow.com/a/47983855/4408538) to the question: [How to generate permutations or combinations of object in R?](https://stackoverflow.com/q/22569176/4408538) by @RandyLai (author of `arrangements`)
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
\ No newline at end of file
diff --git a/SO_Answers/R Function for returning ALL factors.R b/SO_Answers/R Function for returning ALL factors.R
new file mode 100644
index 0000000..26e6da8
--- /dev/null
+++ b/SO_Answers/R Function for returning ALL factors.R
@@ -0,0 +1,184 @@
+reprex::reprex({
+ #' A lot has changed in the R language since this question was originally asked. In version `0.6-3` of the `numbers` package, the function `divisors` was included that is very useful for getting all of the factors of a number. It will meet the needs of most users, however if you are looking for raw speed or you are working with larger numbers, you will need an alternative method. I have authored two packages (partially inspired by this question, I might add) that contain highly optimized functions aimed at problems just like this. The first one is `RcppAlgos` and the other is `RcppBigIntAlgos` (formerly called `bigIntegerAlgos`).
+ #'
+ #' `RcppAlgos`
+ #' =========
+ #' `RcppAlgos` contains two functions for obtaining divisors of numbers less than `2^53 - 1` : `divisorsRcpp` (a vectorized function for quickly obtaining the complete factorization of many numbers) & `divisorsSieve` (quickly generates the complete factorization over a range). First up, we factor many random numbers using `divisorsRcpp`:
+
+ library(gmp) ## for all_divisors by @GeorgeDontas
+ library(RcppAlgos)
+ library(numbers)
+ options(scipen = 999)
+ set.seed(42)
+ testSamp <- sample(10^10, 10)
+
+ all_divisors <- function(x) {
+ sort_listz <- function(z) z <- z[order(as.numeric(z))]
+ mult_listz <- function(x,y) do.call('c', lapply(y, function(i) i*x))
+
+ if (abs(x)<=1) return(x)
+ else {
+ factorsz <- as.bigz(factorize(as.bigz(x)))
+ factorsz <- sort_listz(factorsz)
+ prime_factorsz <- unique(factorsz)
+ prime_ekt <- vapply(prime_factorsz, function(i) sum(factorsz==i), integer(1), USE.NAMES=FALSE)
+ spz <- vector()
+ all <-1
+ n <- length(prime_factorsz)
+ for (i in 1:n) {
+ pr <- prime_factorsz[i]
+ pe <- prime_ekt[i]
+ all <- all*(pe+1)
+
+ prz <- as.bigz(pr)
+ pse <- vector(mode="raw",length=pe+1)
+ pse <- c( as.bigz(1), prz)
+
+ if (pe>1) {
+ for (k in 2:pe) {
+ prz <- prz*pr
+ pse[k+1] <- prz
+ }
+ }
+
+ spz <- if (i>1) mult_listz (spz, pse) else pse;
+ }
+ spz <- sort_listz (spz)
+ return (spz)
+ }
+ }
+
+ ## vectorized so you can pass the entire vector as an argument
+ testRcpp <- divisorsRcpp(testSamp)
+ testDontas <- lapply(testSamp, all_divisors)
+
+ identical(lapply(testDontas, as.numeric), testRcpp)
+
+ #' And now, factor many numbers over a range using `divisorsSieve`:
+
+ identical(lapply(testDontas, as.numeric), testRcpp)
+
+ system.time(testSieve <- divisorsSieve(10^13, 10^13 + 10^5))
+
+ system.time(testDontasSieve <- lapply((10^13):(10^13 + 10^5), all_divisors))
+
+ identical(lapply(testDontasSieve, asNumeric), testSieve)
+
+ #' Both `divisorsRcpp` and `divisorsSieve` are nice functions that are flexible and efficient, however they are limited to `2^53 - 1`.
+ #'
+ #' `RcppBigIntAlgos`
+ #' =============
+ #' The `RcppBigIntAlgos` package (formerly called `bigIntegerAlgos` prior to version 0.2.0) links directly to the [C library gmp](https://gmplib.org/) and features `divisorsBig` which is designed for very large numbers.
+
+ library(RcppBigIntAlgos)
+ ## testSamp is defined above... N.B. divisorsBig is not quite as
+ ## efficient as divisorsRcpp. This is so because divisorsRcpp
+ ## can take advantage of more efficient data types.
+ testBig <- divisorsBig(testSamp)
+
+ identical(testDontas, testBig)
+
+ #' Functions for reproducing results:
+
+ FUN <- function(x) {
+ x <- as.integer(x)
+ div <- seq_len(abs(x))
+ factors <- div[x %% div == 0L]
+ factors <- list(neg = -factors, pos = factors)
+ return(factors)
+ }
+
+ get_all_factors <- function(n) {
+ prime_factor_tables <- lapply(
+ setNames(n, n),
+ function(i) {
+ if(i == 1) return(data.frame(x = 1L, freq = 1L))
+ plyr::count(as.integer(gmp::factorize(i)))
+ }
+ )
+ lapply(
+ prime_factor_tables,
+ function(pft) {
+ powers <- plyr::alply(pft, 1, function(row) row$x ^ seq.int(0L, row$freq))
+ power_grid <- do.call(expand.grid, powers)
+ sort(unique(apply(power_grid, 1, prod)))
+ }
+ )
+ }
+
+ #' And here are the benchmark as defined in my original post (N.B. `MyFactors` is replaced by `divisorsRcpp` and `divisorsBig`).
+
+ library(rbenchmark)
+ set.seed(199)
+ samp <- sample(10^9, 10^5)
+ benchmark(RcppAlgos=divisorsRcpp(samp),
+ RcppBigIntAlgos=divisorsBig(samp),
+ DontasDivs=lapply(samp, all_divisors),
+ replications=10,
+ columns = c("test", "replications", "elapsed", "relative"),
+ order = "relative")
+
+ set.seed(97)
+ samp <- sample(10^6, 10^4)
+ benchmark(
+ RcppAlgos = divisorsRcpp(samp),
+ RcppBigIntAlgos = divisorsBig(samp),
+ numbers = lapply(samp, divisors), ## From the numbers package
+ DontasDivs = lapply(samp, all_divisors),
+ CottonDivs = lapply(samp, get_all_factors),
+ ChaseDivs = lapply(samp, FUN),
+ replications = 5,
+ columns = c("test", "replications", "elapsed", "relative"),
+ order = "relative"
+ )
+
+ #' The next benchmarks demonstrate the true power of the underlying algorithm in the `divisorsBig` function. The number being factored is a power of `10`, so the prime factoring step can almost be completely ignored (e.g. `system.time(factorize(pow.bigz(10,30)))` registers `0` on my machine). Thus, the difference in timing is due solely to how quickly the prime factors can be combined to produce all factors.
+
+ library(microbenchmark)
+ powTen <- pow.bigz(10, 30)
+
+ microbenchmark(
+ algos = divisorsBig(powTen),
+ Dontas = all_divisors(powTen),
+ unit = "relative"
+ )
+
+ ## Negative numbers show an even greater increase in efficiency
+ negPowTen <- powTen * -1
+
+ microbenchmark(
+ algos = divisorsBig(negPowTen),
+ Dontas = all_divisors(negPowTen),
+ unit = "relative"
+ )
+
+ #' Very Large Numbers
+ #' ----------
+ #' With `divisorsBig`, obtaining the complete factorization with very large inputs is no problem. The algorithm dynamically adjusts based off of the input and applies different algorithms in different situations. We can also take advantage of multithreading if [Lenstra's Elliptic Curve method](https://en.wikipedia.org/wiki/Lenstra_elliptic-curve_factorization) or the [Quadratic Sieve](https://en.wikipedia.org/wiki/Quadratic_sieve) is utilized.
+ #'
+ #' Here are some examples using `n5` and `n9` defined in [this answer](https://stackoverflow.com/a/36537823/4408538).
+
+ n5 <- as.bigz("94968915845307373740134800567566911")
+ system.time(print(divisorsBig(n5)))
+
+ n9 <- prod(nextprime(urand.bigz(2, 82, 42)))
+ system.time(print(divisorsBig(n9, nThreads = 4)))
+
+ #' Here is an example provided by @Dontas with one large prime and one smaller prime:
+
+ x <- pow.bigz(2, 256) + 1
+ divisorsBig(x, showStats = TRUE, nThreads = 8)
+
+ #' Compare this to finding the prime factorization using `gmp::factorize`:
+
+ system.time(factorize(x))
+
+ #' Lastly, here is an example with a large semiprime (N.B. since we know it's a semiprime, we skip the extended Pollard's rho algorithm as well as Lentra's elliptic curve method).
+
+ ## https://members.loria.fr/PZimmermann/records/rsa.html
+ rsa79 <- as.bigz("7293469445285646172092483905177589838606665884410340391954917800303813280275279")
+ divisorsBig(
+ rsa79, nThreads = 8, showStats = TRUE,
+ skipPolRho = TRUE, skipECM = TRUE
+ )
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/R find all possible unique combinations.R b/SO_Answers/R find all possible unique combinations.R
new file mode 100644
index 0000000..ae797a4
--- /dev/null
+++ b/SO_Answers/R find all possible unique combinations.R
@@ -0,0 +1,18 @@
+reprex::reprex({
+ #' There are a few packages specifically built for this. The basic premise is that we need combinations with repetition of length `m` where `m` could be larger than the input vector. We start with the classic `gtools`:
+
+ library(gtools)
+ combinations(2, 3, letters[1:2], repeats.allowed = TRUE)
+
+ #' And then there is `arrangements` which is a replacement for `iterpc` (the package linked by @Gregor in the comments above):
+
+ ## library(arrangements)
+ arrangements::combinations(letters[1:2], 3, replace = TRUE)
+
+ #' And finally there is `RcppAlgos`, which I authored:
+
+ library(RcppAlgos)
+ comboGeneral(letters[1:2], 3, TRUE)
+
+ #' `combn` is an awesome function that ships as one of the base packages with `R`, however one of its shortcomings is that it doesn't allow repetition (which is what is required here). I wrote a comprehensive overview for problems exactly like this one that can be found here: [A Walk Through a Slice of Combinatorics in R](https://stackoverflow.com/a/47983855/4408538).
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
\ No newline at end of file
diff --git a/SO_Answers/Unordered combinations of all lengths.R b/SO_Answers/Unordered combinations of all lengths.R
new file mode 100644
index 0000000..7038358
--- /dev/null
+++ b/SO_Answers/Unordered combinations of all lengths.R
@@ -0,0 +1,69 @@
+reprex::reprex({
+ #' I was re-directed here from [List All Combinations With combn](https://stackoverflow.com/q/49570793/4408538) as this was one of the dupe targets. This is an old question and the answer provided by @RichScriven is very nice, but I wanted to give the community a few more options that are arguably more natural and more efficient (the last two).
+ #'
+ #' We first note that the output is very similar to the [Power Set](https://en.wikipedia.org/wiki/Power_set). Calling `powerSet` from the `rje` package, we see that indeed our output matches every element from the power set except the first element which is equivalent to the [Empty Set](https://en.wikipedia.org/wiki/Empty_set):
+
+ x <- c("red", "blue", "black")
+ rje::powerSet(x)
+
+ #' If you don't want the first element, you can easily add a `[-1]` to the end of your function call like so : `rje::powerSet(x)[-1]`.
+ #'
+ #' The next two solutions are from the newer packages `arrangements` and `RcppAlgos` (I am the author), that will offer the user great gains in efficiency. Both of these packages are capable of generating combinations of [Multisets](https://en.wikipedia.org/wiki/Multiset).
+ #'
+ #' > **Why is this important?**
+ #'
+ #' It can be shown that there is a [one-to-one mapping](https://en.wikipedia.org/wiki/Injective_function) from the power set of `A` to all combinations of the multiset `c(rep(emptyElement, length(A)), A)` choose `length(A)`, where `emptyElement` is a representation of the empty set (like zero or a blank). With this in mind, observe:
+
+ ## There is also a function called combinations in the
+ ## rje package, so we fully declare the function with scope operator
+ ##
+ ## library(arrangements)
+ arrangements::combinations(x = c("",x), k = 3, freq = c(2, rep(1, 3)))
+
+ library(RcppAlgos)
+ comboGeneral(c("",x), 3, freqs = c(2, rep(1, 3)))
+
+ #' If you don't like dealing with blank elements and/or matrices, you can also return a list making use of `lapply`.
+
+ lapply(seq_along(x), comboGeneral, v = x)
+
+ lapply(seq_along(x), function(y) arrangements::combinations(x, y))
+
+ #' Now we show that the last two methods are much more efficient (N.B. I removed `do.call(c, ` and `simplify = FALSE` from the answer provided by @RichSciven in order to compare generation of similar outputs. I also included `rje::powerSet` for good measure):
+
+ library(microbenchmark)
+ options(digits = 4)
+ options(width = 90)
+
+ set.seed(8128)
+ bigX <- sort(sample(10^6, 20)) ## With this as an input, we will get 2^20 - 1 results.. i.e. 1,048,575
+
+ microbenchmark(
+ powSetRje = rje::powerSet(bigX),
+ powSetRich = lapply(seq_along(bigX), combn, x = bigX),
+ powSetArrange = lapply(seq_along(bigX), \(y) {
+ arrangements::combinations(x = bigX, k = y)
+ }),
+ powSetAlgos = lapply(seq_along(bigX), comboGeneral, v = bigX),
+ unit = "relative"
+ )
+
+ #' Even further, `arrangements` comes equipped with an argument called `layout` which lets the user choose a particular format for their output. One of those is `layout = "l"` for list. It is similar to setting `simplify = FALSE` in `combn` and allows us to obtain output like that of `powerSet`. Observe:
+
+ do.call(c, lapply(seq_along(x), function(y) {
+ arrangements::combinations(x, y, layout = "l")
+ }))
+
+ #' And the benchmarks:
+
+ microbenchmark(
+ powSetRje = rje::powerSet(bigX)[-1],
+ powSetRich = do.call(c, lapply(seq_along(bigX), combn,
+ x = bigX, simplify = FALSE)),
+ powSetArrange = do.call(c, lapply(seq_along(bigX), \(y) {
+ arrangements::combinations(bigX, y, layout = "l")}
+ )),
+ unit = "relative",
+ times = 15
+ )
+}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
diff --git a/SO_Answers/permutation of 2 to 2 of a sample with duplicate elements.R b/SO_Answers/permutation of 2 to 2 of a sample with duplicate elements.R
new file mode 100644
index 0000000..de31c80
--- /dev/null
+++ b/SO_Answers/permutation of 2 to 2 of a sample with duplicate elements.R
@@ -0,0 +1,15 @@
+reprex::reprex({
+ #' This is easily achieved with one of the many packages for generating permutations with repetition.
+
+ library(gtools)
+ gtools::permutations(3, 2, c(1, 2, 2), set = FALSE, repeats.allowed = TRUE)
+
+ library(arrangements)
+ ## output same as above
+ arrangements::permutations(x = c(1,2,2), k = 2, replace = TRUE)
+
+
+ library(RcppAlgos) ### I am the author
+ ## output same as above
+ RcppAlgos::permuteGeneral(c(1,2,2), 2, TRUE)
+})
\ No newline at end of file
diff --git a/SO_Answers/rperm.R b/SO_Answers/rperm.R
new file mode 100644
index 0000000..2346dcf
--- /dev/null
+++ b/SO_Answers/rperm.R
@@ -0,0 +1,51 @@
+rperm <- function(m, size=2) { # Obtain m unique permutations of 1:size
+ max.failures <- 10
+
+ # Function to index into the upper-level cache.
+ prefix <- function(p, k) { # p is a permutation, k is the prefix size
+ sum((p[1:k] - 1) * (size ^ ((1:k)-1))) + 1
+ } # Returns a value from 1 through size^k
+
+ # Function to obtain a new permutation.
+ newperm <- function() {
+ # References cache, k.head, and failures in parent context.
+ # Modifies cache and failures.
+
+ count <- 0 # Protects against infinite loops
+ repeat {
+ # Generate a permutation and check against previous ones.
+ p <- sample(1:size)
+ k <- prefix(p, k.head)
+ ip <- cache[[k]]
+ hash.p <- paste(tail(p,-k.head), collapse="")
+ if (is.null(ip[[hash.p]])) break
+
+ # Prepare to try again.
+ n.failures <<- n.failures + 1
+ count <- count+1
+ if (count > max.failures) {
+ p <- NA # NA indicates a new permutation wasn't found
+ hash.p <- ""
+ break
+ }
+ }
+ if (count <= max.failures) {
+ ip[[hash.p]] <- TRUE # Update the list of permutations found
+ cache[[k]] <<- ip
+ }
+ p # Return this (new) permutation
+ }
+
+ # Initialize the cache.
+ k.head <- min(size-1, max(1, floor(log(m / log(m)) / log(size))))
+ cache <- as.list(1:(size^k.head))
+ for (i in 1:(size^k.head)) cache[[i]] <- list()
+
+ # Count failures (for benchmarking and error checking).
+ n.failures <- 0
+
+ # Obtain (up to) m unique permutations.
+ s <- replicate(m, newperm())
+ s[is.na(s)] <- NULL
+ list(failures=n.failures, sample=matrix(unlist(s), ncol=size))
+} # Returns an m by size matrix; each row is a permutation of 1:size.
\ No newline at end of file
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..04c5585
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,14 @@
+comment: false
+
+coverage:
+ status:
+ project:
+ default:
+ target: auto
+ threshold: 1%
+ informational: true
+ patch:
+ default:
+ target: auto
+ threshold: 1%
+ informational: true
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index b0eaf0b..9813386 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -1,7 +1,22 @@
\name{NEWS}
\title{News for Package \pkg{RcppAlgos}}
-\section{Changes in RcppAlgos version 2.8.3 (Release date: 2023-12-11)}{
+\section{Changes in RcppAlgos version 2.8.4 (Release date: 2024-06-02)}{
+ \itemize{
+ \item New Features:
+ \itemize{
+ \item Can now pass integer convertible results to the sample functions for the \code{n} parameter.
+ \item Added \code{...} to allow for passing additional arguments to \code{FUN}. For example: \code{comboGeneral(letters, 3, FUN = paste, collapse = ", ")}.
+ }
+ \item Bug Fixes:
+ \itemize{
+ \item Fixed bug with iterator when multithreading and exhausting the iterator.
+ \item Enforced values being converted to a primitive to be of length 1.
+ }
+ }
+}
+
+\section{Changes in RcppAlgos version 2.8.3 (Release date: 2023-12-10)}{
\itemize{
\item New Features:
\itemize{
diff --git a/inst/include/ClassUtils/ComboClass.h b/inst/include/ClassUtils/ComboClass.h
index 2afd0b4..1034d3b 100644
--- a/inst/include/ClassUtils/ComboClass.h
+++ b/inst/include/ClassUtils/ComboClass.h
@@ -12,7 +12,7 @@
class Combo {
private:
- SEXP MatForward(int nRows);
+ SEXP MatForward(int nRows, int numIncrement);
SEXP MatReverse(int nRows);
protected:
diff --git a/scripts/GeneralCombinatorics.R b/scripts/GeneralCombinatorics.R
index 207113a..c0a898a 100644
--- a/scripts/GeneralCombinatorics.R
+++ b/scripts/GeneralCombinatorics.R
@@ -327,4 +327,3 @@ reprex::reprex({
#'
}, advertise = FALSE, venue = "r", html_preview = FALSE, wd = ".")
-
diff --git a/src/ComboClass.cpp b/src/ComboClass.cpp
index 729f6ba..136d36e 100644
--- a/src/ComboClass.cpp
+++ b/src/ComboClass.cpp
@@ -78,7 +78,7 @@ SEXP Combo::BasicVecReturn() {
return res;
}
-SEXP Combo::MatForward(int nRows) {
+SEXP Combo::MatForward(int nRows, int numIncrement) {
int nThreads = 1;
bool LocalPar = Parallel;
@@ -93,6 +93,7 @@ SEXP Combo::MatForward(int nRows) {
);
zUpdateIndex(vNum, vInt, z, sexpVec, res, m, nRows);
+ increment(IsGmp, mpzIndex, dblIndex, numIncrement);
if (!IsComb) TopOffPerm(z, myReps, n, m, IsRep, IsMult);
return res;
}
@@ -184,7 +185,7 @@ SEXP Combo::nextNumCombs(SEXP RNum) {
int num;
CppConvert::convertPrimitive(RNum, num, VecType::Integer,
- "The number of results");
+ "The number of results");
if (CheckIndLT(IsGmp, mpzIndex, dblIndex,
computedRowsMpz, computedRows)) {
@@ -206,8 +207,7 @@ SEXP Combo::nextNumCombs(SEXP RNum) {
nextIter(freqs, z, n1, m1);
}
- increment(IsGmp, mpzIndex, dblIndex, numIncrement);
- return MatForward(nRows);
+ return MatForward(nRows, numIncrement);
} else if (CheckEqInd(IsGmp, mpzIndex, dblIndex,
computedRowsMpz, computedRows)) {
return ToSeeLast();
@@ -220,7 +220,7 @@ SEXP Combo::prevNumCombs(SEXP RNum) {
int num;
CppConvert::convertPrimitive(RNum, num, VecType::Integer,
- "The number of results");
+ "The number of results");
if (CheckGrTSi(IsGmp, mpzIndex, dblIndex, 1)) {
int nRows = 0;
@@ -286,7 +286,7 @@ SEXP Combo::nextGather() {
dblIndex = computedRows + 1;
}
- return MatForward(nRows);
+ return MatForward(nRows, 0);
} else {
return R_NilValue;
}
diff --git a/src/GmpConvert.cpp b/src/GmpConvert.cpp
index a8b086f..5139bfc 100644
--- a/src/GmpConvert.cpp
+++ b/src/GmpConvert.cpp
@@ -154,9 +154,16 @@ namespace CppConvert {
switch (TYPEOF(input)) {
case RAWSXP: {
- // deserialise the vector. first int is the size.
+ // de-serialize the vector. first int is the size.
const char* raw = (char*) RAW(input);
const int* r = (int*) (&raw[intSize]);
+ const int len = ((int*) raw)[0];
+
+ if (len > 1) {
+ myError = nameOfObject + " must be of length 1";
+ foundError = true;
+ break;
+ }
if (r[0] > 0) {
mpz_import(result.get_mpz_t(), r[0], 1,
@@ -179,6 +186,12 @@ namespace CppConvert {
break;
} case REALSXP: {
+ if (Rf_length(input) > 1) {
+ myError = nameOfObject + " must be of length 1";
+ foundError = true;
+ break;
+ }
+
const double dblInput = Rf_asReal(input);
if (ISNAN(dblInput)) {
@@ -222,6 +235,12 @@ namespace CppConvert {
}
case INTSXP:
case LGLSXP: {
+ if (Rf_length(input) > 1) {
+ myError = nameOfObject + " must be of length 1";
+ foundError = true;
+ break;
+ }
+
const int intInput = Rf_asInteger(input);
const int dblInput = Rf_asReal(input);
@@ -240,6 +259,12 @@ namespace CppConvert {
result = intInput;
break;
} case STRSXP: {
+ if (Rf_length(input) > 1) {
+ myError = nameOfObject + " must be of length 1";
+ foundError = true;
+ break;
+ }
+
if (STRING_ELT(input, 0) == NA_STRING) {
myError = nameOfObject + " cannot be NA or NaN";
foundError = true;
diff --git a/src/SetUpUtils.cpp b/src/SetUpUtils.cpp
index f7a3c9c..465302b 100644
--- a/src/SetUpUtils.cpp
+++ b/src/SetUpUtils.cpp
@@ -133,8 +133,6 @@ void SetFreqsAndM(std::vector &Reps,
if (Rf_isNull(Rm)) {
m = freqs.empty() ? n : freqs.size();
- } else if (Rf_length(Rm) > 1) {
- cpp11::stop("length of m must be 1");
} else {
CppConvert::convertPrimitive(Rm, m, VecType::Integer, "m");
}
@@ -553,13 +551,8 @@ void SetRandomSample(SEXP RindexVec, SEXP RNumSamp, int &sampSize,
cpp11::stop("n and sampleVec cannot both be NULL");
}
- if (Rf_length(RNumSamp) > 1) {
- cpp11::stop("length of n must be 1. For specific "
- "combinations, use sampleVec.");
- }
-
CppConvert::convertPrimitive(RNumSamp, sampSize,
- VecType::Integer, "n");
+ VecType::Integer, "n", false);
if (!IsGmp) {
if (sampSize > computedRows) {
@@ -568,7 +561,7 @@ void SetRandomSample(SEXP RindexVec, SEXP RNumSamp, int &sampSize,
cpp11::sexp sample = Rf_lang3(baseSample,
Rf_ScalarReal(computedRows),
- RNumSamp);
+ Rf_ScalarInteger(sampSize));
cpp11::sexp val = Rf_eval(sample, rho);
mySample.resize(sampSize);
@@ -585,7 +578,6 @@ void SetRandomSample(SEXP RindexVec, SEXP RNumSamp, int &sampSize,
mySample[j] = dblSamp[j];
}
}
-
}
} else {
if (IsGmp) {
@@ -600,8 +592,8 @@ void SetRandomSample(SEXP RindexVec, SEXP RNumSamp, int &sampSize,
}
} else {
CppConvert::convertVector(RindexVec, mySample,
- VecType::Numeric,
- "sampleVec", false);
+ VecType::Numeric,
+ "sampleVec", false);
sampSize = mySample.size();
const double myMax = *std::max_element(mySample.cbegin(),
mySample.cend());
diff --git a/src/StdConvert.cpp b/src/StdConvert.cpp
index f686b1d..ed845ef 100644
--- a/src/StdConvert.cpp
+++ b/src/StdConvert.cpp
@@ -48,6 +48,11 @@ namespace CppConvert {
case REALSXP:
case LGLSXP:
case INTSXP: {
+ if (Rf_length(input) > 1) {
+ cpp11::stop(" %s must be of length 1",
+ nameOfObject.c_str());
+ }
+
const double dblInp = Rf_asReal(input);
const double posDblInp = std::abs(dblInp);
@@ -94,8 +99,9 @@ namespace CppConvert {
}
mpz_class temp;
- CppConvert::convertMpzClass(input, temp, nameOfObject,
- negPoss);
+ CppConvert::convertMpzClass(
+ input, temp, nameOfObject, negPoss
+ );
const double dblTemp = temp.get_d();
const double posDblTemp = std::abs(dblTemp);
@@ -131,7 +137,7 @@ namespace CppConvert {
nameOfObject.c_str());
}
- result = dblTemp;
+ result = static_cast(dblTemp);
break;
} default: {
cpp11::stop(
diff --git a/tests/testthat/testErrors.R b/tests/testthat/testErrors.R
index 6950a58..c744d1f 100644
--- a/tests/testthat/testErrors.R
+++ b/tests/testthat/testErrors.R
@@ -133,14 +133,14 @@ test_that("partitionsIter related functions produces appropriate error messages"
test_that(paste("partitions/compositionsDesign functions",
"produces appropriate error messages"), {
- expect_error(compositionsDesign(0:40, 8),
+ expect_error(RcppAlgos:::compositionsDesign(0:40, 8),
"Currently, there is no composition algorithm")
- expect_error(compositionsDesign(40, 8, freqs = rep(1:5, 8)),
+ expect_error(RcppAlgos:::compositionsDesign(40, 8, freqs = rep(1:5, 8)),
"Currently, there is no composition algorithm")
- expect_error(partitionsDesign(0:17 + rnorm(18), 10,
+ expect_error(RcppAlgos:::partitionsDesign(0:17 + rnorm(18), 10,
repetition = TRUE, target = 25),
"No design available for this case!")
- expect_error(partitionsCount(0:17 + rnorm(18), 10,
+ expect_error(RcppAlgos:::partitionsCount(0:17 + rnorm(18), 10,
repetition = TRUE, target = 25),
"The count is unknown for this case.")
})
@@ -251,7 +251,7 @@ test_that("permuteGeneral produces appropriate error messages", {
limitConstraints = 100,
upper = 10^10),
"number of rows cannot exceed")
- expect_error(permuteGeneral(5, 1:5), "length of m must be 1")
+ expect_error(permuteGeneral(5, 1:5), "m must be of length 1")
expect_error(permuteGeneral(5, -5), "m must be a positive whole number")
expect_error(permuteCount(5, 5, "TRUE"),
"Only logical values are supported for repetition")
@@ -353,9 +353,7 @@ test_that("comboSample produces appropriate error messages", {
"exceeds the maximum number of possible results")
expect_error(comboSample(5,freqs = rep(1,6)),
"m must be less than or equal to the length of v")
- expect_error(comboSample(5,3, n = "5"),
- "n must be of type numeric or integer")
- expect_error(comboSample(5,3, n = 1:5), "length of n must be 1")
+ expect_error(comboSample(5,3, n = 1:5), "n must be of length 1")
expect_error(comboSample(100000, 10, sampleVec = -1L),
"sampleVec must be a positive number")
expect_error(comboSample(100000, 10, sampleVec = NaN),
@@ -430,16 +428,14 @@ test_that("{combo|permute|partitions|compositions}Rank produces appropriate erro
test_that("permuteSample produces appropriate error messages", {
expect_error(permuteSample(5, 6, n = 3),
"m must be less than or equal to the length of v")
- expect_error(permuteSample(5, 1:5), "length of m must be 1")
+ expect_error(permuteSample(5, 1:5), "m must be of length 1")
expect_error(permuteSample(5, -4), "m must be a positive whole number")
expect_error(permuteSample(5, 3), "n and sampleVec cannot both be NULL")
expect_error(permuteSample(5,3,freqs = c(1,2,3,-2,1)),
"in freqs must be a positive")
expect_error(permuteSample(5,3, n = 100),
"n exceeds the maximum number of possible results")
- expect_error(permuteSample(5,3, n = "5"),
- "n must be of type numeric or integer")
- expect_error(permuteSample(5,3, n = 1:5), "length of n must be 1")
+ expect_error(permuteSample(5,3, n = 1:5), "n must be of length 1")
expect_error(permuteSample(5,3, permuteSample(5,3, sampleVec = 1:200)),
"exceeds the maximum number of possible results")
expect_error(permuteSample(5,freqs = rep(1,6)),