diff --git a/R/popmusic.R b/R/popmusic.R
new file mode 100644
index 0000000..cfe0541
--- /dev/null
+++ b/R/popmusic.R
@@ -0,0 +1,99 @@
+
+
+#' Spotify data for 3 artists
+#'
+#' Audio features for all recordings by Taylor Swift, Radiohead, and The
+#' National, as available in the Spotify API.
+#'
+#'
+#' @format Audio features for one track
+#' \describe{
+#' \item{artist}{Who is performing this music? This data contains tracks by
+#'   Taylor Swift, Radiohead, and The National.}
+#' \item{acousticness}{A confidence measure from 0.0 to 1.0 of whether the track
+#'   is acoustic. 1.0 represents high confidence the track is acoustic.}
+#' \item{danceability}{Danceability describes how suitable a track is for
+#'   dancing based on a combination of musical elements including tempo, rhythm
+#'   stability, beat strength, and overall regularity. A value of 0.0 is least
+#'   danceable and 1.0 is most danceable.}
+#' \item{duration_ms}{The duration of the track in milliseconds.}
+#' \item{energy}{Energy is a measure from 0.0 to 1.0 and represents a
+#'   perceptual measure of intensity and activity. Typically, energetic tracks
+#'   feel fast, loud, and noisy. For example, death metal has high energy,
+#'   while a Bach prelude scores low on the scale. Perceptual features
+#'   contributing to this attribute include dynamic range, perceived loudness,
+#'   timbre, onset rate, and general entropy.}
+#' \item{instrumentalness}{Predicts whether a track contains no vocals. "Ooh"
+#'   and "aah" sounds are treated as instrumental in this context. Rap or
+#'   spoken word tracks are clearly "vocal". The closer the instrumentalness
+#'   value is to 1.0, the greater likelihood the track contains no vocal
+#'   content. Values above 0.5 are intended to represent instrumental tracks,
+#'   but confidence is higher as the value approaches 1.0.}
+#' \item{key}{The key the track is in. Integers map to pitches using standard
+#'   Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D, and so on. If no key
+#'   was detected, the value is -1.}
+#' \item{liveness}{Detects the presence of an audience in the recording. Higher
+#'   liveness values represent an increased probability that the track was
+#'   performed live. A value above 0.8 provides strong likelihood that the
+#'   track is live.}
+#' \item{loudness}{The overall loudness of a track in decibels (dB). Loudness
+#'   values are averaged across the entire track and are useful for comparing
+#'   relative loudness of tracks. Loudness is the quality of a sound that is
+#'   the primary psychological correlate of physical strength (amplitude).
+#'   Values typically range between -60 and 0 db.}
+#' \item{mode}{Mode indicates the modality (major or minor) of a track, the
+#'   type of scale from which its melodic content is derived. Major is
+#'   represented by 1 and minor is 0.}
+#' \item{speechiness}{Speechiness detects the presence of spoken words in a
+#'   track. The more exclusively speech-like the recording (e.g. talk show,
+#'   audio book, poetry), the closer to 1.0 the attribute value. Values above
+#'   0.66 describe tracks that are probably made entirely of spoken words.
+#'   Values between 0.33 and 0.66 describe tracks that may contain both music
+#'   and speech, either in sections or layered, including such cases as rap
+#'   music. Values below 0.33 most likely represent music and other
+#'   non-speech-like tracks.}
+#' \item{tempo}{The overall estimated tempo of a track in beats per minute
+#'   (BPM). In musical terminology, tempo is the speed or pace of a given piece
+#'   and derives directly from the average beat duration.}
+#' \item{time_signature}{An estimated time signature. The time signature (meter)
+#'   is a notational convention to specify how many beats are in each bar (or
+#'   measure). The time signature ranges from 3 to 7 indicating time signatures
+#'   of "3/4", to "7/4".}
+#' \item{valence}{A measure from 0.0 to 1.0 describing the musical positiveness
+#'   conveyed by a track. Tracks with high valence sound more positive (e.g.
+#'   happy, cheerful, euphoric), while tracks with low valence sound more
+#'   negative (e.g. sad, depressed, angry).}
+#' \item{explicit}{Are there bad words that prevent typical play on the radio
+#'   (not safe for work).}
+#' }
+#'
+#' @name popmusic
+#' @source Rohan Alexander (2023). _Telling Stories with Data: With_
+#'   _Applications in `R`_. CRC Press, Toronto.
+#'   <https://tellingstorieswithdata.com/>.
+#'
+#'   The example data are available with the textbook, originally from the
+#'   Spotify API and described in Chapter 7.
+#'
+#'   Documentation is from [Spotify API](https://developer.spotify.com/documentation/web-api/reference/get-audio-features).
+#' @keywords datasets
+NULL
+
+#' @details
+#' `popmusic_train` is the training set.
+#' @rdname popmusic
+#' @examples
+#' popmusic_train
+#'
+"popmusic_train"
+
+
+#' @details
+#' `popmusic_test` is the test set.
+#' It contains a held out set of `r nrow(popmusic_test)` songs.
+#'
+#' @rdname popmusic
+#' @examples
+#' popmusic_test
+#'
+"popmusic_test"
diff --git a/data-raw/mcycle.R b/data-raw/mcycle.R
index 2e7a446..bbe833f 100644
--- a/data-raw/mcycle.R
+++ b/data-raw/mcycle.R
@@ -1,7 +1,7 @@
 ## code to prepare `mcycle` dataset goes here
 m <- MASS::mcycle
 n <- nrow(m)
-sets <- c(0, rep(1:3, times = n - 2), 0)
+sets <- c(0, rep(1:3, length = n - 2), 0)
 mcycle_test <- m[sets == 2, ]
 mcycle_train <- m[sets != 2, ]
 
diff --git a/data-raw/popmusic.R b/data-raw/popmusic.R
new file mode 100644
index 0000000..5576ab7
--- /dev/null
+++ b/data-raw/popmusic.R
@@ -0,0 +1,23 @@
+## code to prepare `popmusic` dataset goes here
+library(tidyverse)
+## all are used in R. Alexander's "Telling Stories with Data"
+taytay <- read_rds("https://raw.githubusercontent.com/RohanAlexander/telling_stories/main/inputs/data/taylor_swift.rds")
+radiohead <- read_rds("https://raw.githubusercontent.com/RohanAlexander/telling_stories/main/inputs/data/radiohead.rds")
+the_national <- read_rds("https://raw.githubusercontent.com/RohanAlexander/telling_stories/main/inputs/data/the_national.rds")
+
+popmusic <- bind_rows(taytay, radiohead, the_national) |>
+  as_tibble() |>
+  select(artist = artist_name, danceability:tempo,
+         time_signature, duration_ms, explicit) |>
+  mutate(artist = as.factor(artist))
+
+idx <- 1:nrow(popmusic)
+splitter <- tibble(idx, gr = popmusic$artist) |>
+  slice_sample(prop = .75, by = gr)
+
+popmusic_train <- popmusic[splitter$idx, ]
+popmusic_test <- popmusic[setdiff(idx, splitter$idx), ]
+
+
+usethis::use_data(popmusic_train, overwrite = TRUE)
+usethis::use_data(popmusic_test, overwrite = TRUE)
diff --git a/data/mcycle_test.rda b/data/mcycle_test.rda
index 6bde113..0c71c79 100644
Binary files a/data/mcycle_test.rda and b/data/mcycle_test.rda differ
diff --git a/data/mcycle_train.rda b/data/mcycle_train.rda
index b876804..733b9dc 100644
Binary files a/data/mcycle_train.rda and b/data/mcycle_train.rda differ
diff --git a/data/popmusic_test.rda b/data/popmusic_test.rda
new file mode 100644
index 0000000..c75e8bd
Binary files /dev/null and b/data/popmusic_test.rda differ
diff --git a/data/popmusic_train.rda b/data/popmusic_train.rda
new file mode 100644
index 0000000..d1640e0
Binary files /dev/null and b/data/popmusic_train.rda differ
diff --git a/man/mcycle.Rd b/man/mcycle.Rd
index 854b0d9..80e1451 100644
--- a/man/mcycle.Rd
+++ b/man/mcycle.Rd
@@ -7,9 +7,9 @@
 \alias{mcycle_test}
 \title{Data from a Simulated Motorcycle Accident}
 \format{
-An object of class \code{data.frame} with 264 rows and 2 columns.
+An object of class \code{data.frame} with 89 rows and 2 columns.
 
-An object of class \code{data.frame} with 131 rows and 2 columns.
+An object of class \code{data.frame} with 44 rows and 2 columns.
 }
 \source{
 Silverman, B. W. (1985) Some aspects of the spline smoothing approach to
diff --git a/man/popmusic.Rd b/man/popmusic.Rd
new file mode 100644
index 0000000..c5c7fdd
--- /dev/null
+++ b/man/popmusic.Rd
@@ -0,0 +1,106 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/popmusic.R
+\docType{data}
+\name{popmusic}
+\alias{popmusic}
+\alias{popmusic_train}
+\alias{popmusic_test}
+\title{Spotify data for 3 artists}
+\format{
+Audio features for one track
+\describe{
+\item{artist}{Who is performing this music? This data contains tracks by
+Taylor Swift, Radiohead, and The National.}
+\item{acousticness}{A confidence measure from 0.0 to 1.0 of whether the track
+is acoustic. 1.0 represents high confidence the track is acoustic.}
+\item{danceability}{Danceability describes how suitable a track is for
+dancing based on a combination of musical elements including tempo, rhythm
+stability, beat strength, and overall regularity. A value of 0.0 is least
+danceable and 1.0 is most danceable.}
+\item{duration_ms}{The duration of the track in milliseconds.}
+\item{energy}{Energy is a measure from 0.0 to 1.0 and represents a
+perceptual measure of intensity and activity. Typically, energetic tracks
+feel fast, loud, and noisy. For example, death metal has high energy,
+while a Bach prelude scores low on the scale. Perceptual features
+contributing to this attribute include dynamic range, perceived loudness,
+timbre, onset rate, and general entropy.}
+\item{instrumentalness}{Predicts whether a track contains no vocals. "Ooh"
+and "aah" sounds are treated as instrumental in this context. Rap or
+spoken word tracks are clearly "vocal". The closer the instrumentalness
+value is to 1.0, the greater likelihood the track contains no vocal
+content. Values above 0.5 are intended to represent instrumental tracks,
+but confidence is higher as the value approaches 1.0.}
+\item{key}{The key the track is in. Integers map to pitches using standard
+Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D, and so on. If no key
+was detected, the value is -1.}
+\item{liveness}{Detects the presence of an audience in the recording. Higher
+liveness values represent an increased probability that the track was
+performed live. A value above 0.8 provides strong likelihood that the
+track is live.}
+\item{loudness}{The overall loudness of a track in decibels (dB). Loudness
+values are averaged across the entire track and are useful for comparing
+relative loudness of tracks. Loudness is the quality of a sound that is
+the primary psychological correlate of physical strength (amplitude).
+Values typically range between -60 and 0 db.}
+\item{mode}{Mode indicates the modality (major or minor) of a track, the
+type of scale from which its melodic content is derived. Major is
+represented by 1 and minor is 0.}
+\item{speechiness}{Speechiness detects the presence of spoken words in a
+track. The more exclusively speech-like the recording (e.g. talk show,
+audio book, poetry), the closer to 1.0 the attribute value. Values above
+0.66 describe tracks that are probably made entirely of spoken words.
+Values between 0.33 and 0.66 describe tracks that may contain both music
+and speech, either in sections or layered, including such cases as rap
+music. Values below 0.33 most likely represent music and other
+non-speech-like tracks.}
+\item{tempo}{The overall estimated tempo of a track in beats per minute
+(BPM). In musical terminology, tempo is the speed or pace of a given piece
+and derives directly from the average beat duration.}
+\item{time_signature}{An estimated time signature. The time signature (meter)
+is a notational convention to specify how many beats are in each bar (or
+measure). The time signature ranges from 3 to 7 indicating time signatures
+of "3/4", to "7/4".}
+\item{valence}{A measure from 0.0 to 1.0 describing the musical positiveness
+conveyed by a track. Tracks with high valence sound more positive (e.g.
+happy, cheerful, euphoric), while tracks with low valence sound more
+negative (e.g. sad, depressed, angry).}
+\item{explicit}{Are there bad words that prevent typical play on the radio
+(not safe for work).}
+}
+
+An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 1269 rows and 15 columns.
+
+An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 425 rows and 15 columns.
+}
+\source{
+Rohan Alexander (2023). \emph{Telling Stories with Data: With}
+\emph{Applications in \code{R}}. CRC Press, Toronto.
+\url{https://tellingstorieswithdata.com/}.
+
+The example data are available with the textbook, originally from the
+Spotify API and described in Chapter 7.
+
+Documentation is from \href{https://developer.spotify.com/documentation/web-api/reference/get-audio-features}{Spotify API}.
+}
+\usage{
+popmusic_train
+
+popmusic_test
+}
+\description{
+Audio features for all recordings by Taylor Swift, Radiohead, and The
+National, as available in the Spotify API.
+}
+\details{
+\code{popmusic_train} is the training set.
+
+\code{popmusic_test} is the test set.
+It contains a held out set of 425 songs.
+}
+\examples{
+popmusic_train
+
+popmusic_test
+
+}
+\keyword{datasets}