diff --git a/R/popmusic.R b/R/popmusic.R new file mode 100644 index 0000000..cfe0541 --- /dev/null +++ b/R/popmusic.R @@ -0,0 +1,99 @@ + + +#' Spotify data for 3 artists +#' +#' Audio features for all recordings by Taylor Swift, Radiohead, and The +#' National, as available in the Spotify API. +#' +#' +#' @format Audio features for one track +#' \describe{ +#' \item{artist}{Who is performing this music? This data contains tracks by +#' Taylor Swift, Radiohead, and The National.} +#' \item{acousticness}{A confidence measure from 0.0 to 1.0 of whether the track +#' is acoustic. 1.0 represents high confidence the track is acoustic.} +#' \item{danceability}{Danceability describes how suitable a track is for +#' dancing based on a combination of musical elements including tempo, rhythm +#' stability, beat strength, and overall regularity. A value of 0.0 is least +#' danceable and 1.0 is most danceable.} +#' \item{duration_ms}{The duration of the track in milliseconds.} +#' \item{energy}{Energy is a measure from 0.0 to 1.0 and represents a +#' perceptual measure of intensity and activity. Typically, energetic tracks +#' feel fast, loud, and noisy. For example, death metal has high energy, +#' while a Bach prelude scores low on the scale. Perceptual features +#' contributing to this attribute include dynamic range, perceived loudness, +#' timbre, onset rate, and general entropy.} +#' \item{instrumentalness}{Predicts whether a track contains no vocals. "Ooh" +#' and "aah" sounds are treated as instrumental in this context. Rap or +#' spoken word tracks are clearly "vocal". The closer the instrumentalness +#' value is to 1.0, the greater likelihood the track contains no vocal +#' content. Values above 0.5 are intended to represent instrumental tracks, +#' but confidence is higher as the value approaches 1.0.} +#' \item{key}{The key the track is in. Integers map to pitches using standard +#' Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D, and so on. If no key +#' was detected, the value is -1.} +#' \item{liveness}{Detects the presence of an audience in the recording. Higher +#' liveness values represent an increased probability that the track was +#' performed live. A value above 0.8 provides strong likelihood that the +#' track is live.} +#' \item{loudness}{The overall loudness of a track in decibels (dB). Loudness +#' values are averaged across the entire track and are useful for comparing +#' relative loudness of tracks. Loudness is the quality of a sound that is +#' the primary psychological correlate of physical strength (amplitude). +#' Values typically range between -60 and 0 db.} +#' \item{mode}{Mode indicates the modality (major or minor) of a track, the +#' type of scale from which its melodic content is derived. Major is +#' represented by 1 and minor is 0.} +#' \item{speechiness}{Speechiness detects the presence of spoken words in a +#' track. The more exclusively speech-like the recording (e.g. talk show, +#' audio book, poetry), the closer to 1.0 the attribute value. Values above +#' 0.66 describe tracks that are probably made entirely of spoken words. +#' Values between 0.33 and 0.66 describe tracks that may contain both music +#' and speech, either in sections or layered, including such cases as rap +#' music. Values below 0.33 most likely represent music and other +#' non-speech-like tracks.} +#' \item{tempo}{The overall estimated tempo of a track in beats per minute +#' (BPM). In musical terminology, tempo is the speed or pace of a given piece +#' and derives directly from the average beat duration.} +#' \item{time_signature}{An estimated time signature. The time signature (meter) +#' is a notational convention to specify how many beats are in each bar (or +#' measure). The time signature ranges from 3 to 7 indicating time signatures +#' of "3/4", to "7/4".} +#' \item{valence}{A measure from 0.0 to 1.0 describing the musical positiveness +#' conveyed by a track. Tracks with high valence sound more positive (e.g. +#' happy, cheerful, euphoric), while tracks with low valence sound more +#' negative (e.g. sad, depressed, angry).} +#' \item{explicit}{Are there bad words that prevent typical play on the radio +#' (not safe for work).} +#' } +#' +#' @name popmusic +#' @source Rohan Alexander (2023). _Telling Stories with Data: With_ +#' _Applications in `R`_. CRC Press, Toronto. +#' . +#' +#' The example data are available with the textbook, originally from the +#' Spotify API and described in Chapter 7. +#' +#' Documentation is from [Spotify API](https://developer.spotify.com/documentation/web-api/reference/get-audio-features). +#' @keywords datasets +NULL + +#' @details +#' `popmusic_train` is the training set. +#' @rdname popmusic +#' @examples +#' popmusic_train +#' +"popmusic_train" + + +#' @details +#' `popmusic_test` is the test set. +#' It contains a held out set of `r nrow(popmusic_test)` songs. +#' +#' @rdname popmusic +#' @examples +#' popmusic_test +#' +"popmusic_test" diff --git a/data-raw/mcycle.R b/data-raw/mcycle.R index 2e7a446..bbe833f 100644 --- a/data-raw/mcycle.R +++ b/data-raw/mcycle.R @@ -1,7 +1,7 @@ ## code to prepare `mcycle` dataset goes here m <- MASS::mcycle n <- nrow(m) -sets <- c(0, rep(1:3, times = n - 2), 0) +sets <- c(0, rep(1:3, length = n - 2), 0) mcycle_test <- m[sets == 2, ] mcycle_train <- m[sets != 2, ] diff --git a/data-raw/popmusic.R b/data-raw/popmusic.R new file mode 100644 index 0000000..5576ab7 --- /dev/null +++ b/data-raw/popmusic.R @@ -0,0 +1,23 @@ +## code to prepare `popmusic` dataset goes here +library(tidyverse) +## all are used in R. Alexander's "Telling Stories with Data" +taytay <- read_rds("https://raw.githubusercontent.com/RohanAlexander/telling_stories/main/inputs/data/taylor_swift.rds") +radiohead <- read_rds("https://raw.githubusercontent.com/RohanAlexander/telling_stories/main/inputs/data/radiohead.rds") +the_national <- read_rds("https://raw.githubusercontent.com/RohanAlexander/telling_stories/main/inputs/data/the_national.rds") + +popmusic <- bind_rows(taytay, radiohead, the_national) |> + as_tibble() |> + select(artist = artist_name, danceability:tempo, + time_signature, duration_ms, explicit) |> + mutate(artist = as.factor(artist)) + +idx <- 1:nrow(popmusic) +splitter <- tibble(idx, gr = popmusic$artist) |> + slice_sample(prop = .75, by = gr) + +popmusic_train <- popmusic[splitter$idx, ] +popmusic_test <- popmusic[setdiff(idx, splitter$idx), ] + + +usethis::use_data(popmusic_train, overwrite = TRUE) +usethis::use_data(popmusic_test, overwrite = TRUE) diff --git a/data/mcycle_test.rda b/data/mcycle_test.rda index 6bde113..0c71c79 100644 Binary files a/data/mcycle_test.rda and b/data/mcycle_test.rda differ diff --git a/data/mcycle_train.rda b/data/mcycle_train.rda index b876804..733b9dc 100644 Binary files a/data/mcycle_train.rda and b/data/mcycle_train.rda differ diff --git a/data/popmusic_test.rda b/data/popmusic_test.rda new file mode 100644 index 0000000..c75e8bd Binary files /dev/null and b/data/popmusic_test.rda differ diff --git a/data/popmusic_train.rda b/data/popmusic_train.rda new file mode 100644 index 0000000..d1640e0 Binary files /dev/null and b/data/popmusic_train.rda differ diff --git a/man/mcycle.Rd b/man/mcycle.Rd index 854b0d9..80e1451 100644 --- a/man/mcycle.Rd +++ b/man/mcycle.Rd @@ -7,9 +7,9 @@ \alias{mcycle_test} \title{Data from a Simulated Motorcycle Accident} \format{ -An object of class \code{data.frame} with 264 rows and 2 columns. +An object of class \code{data.frame} with 89 rows and 2 columns. -An object of class \code{data.frame} with 131 rows and 2 columns. +An object of class \code{data.frame} with 44 rows and 2 columns. } \source{ Silverman, B. W. (1985) Some aspects of the spline smoothing approach to diff --git a/man/popmusic.Rd b/man/popmusic.Rd new file mode 100644 index 0000000..c5c7fdd --- /dev/null +++ b/man/popmusic.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/popmusic.R +\docType{data} +\name{popmusic} +\alias{popmusic} +\alias{popmusic_train} +\alias{popmusic_test} +\title{Spotify data for 3 artists} +\format{ +Audio features for one track +\describe{ +\item{artist}{Who is performing this music? This data contains tracks by +Taylor Swift, Radiohead, and The National.} +\item{acousticness}{A confidence measure from 0.0 to 1.0 of whether the track +is acoustic. 1.0 represents high confidence the track is acoustic.} +\item{danceability}{Danceability describes how suitable a track is for +dancing based on a combination of musical elements including tempo, rhythm +stability, beat strength, and overall regularity. A value of 0.0 is least +danceable and 1.0 is most danceable.} +\item{duration_ms}{The duration of the track in milliseconds.} +\item{energy}{Energy is a measure from 0.0 to 1.0 and represents a +perceptual measure of intensity and activity. Typically, energetic tracks +feel fast, loud, and noisy. For example, death metal has high energy, +while a Bach prelude scores low on the scale. Perceptual features +contributing to this attribute include dynamic range, perceived loudness, +timbre, onset rate, and general entropy.} +\item{instrumentalness}{Predicts whether a track contains no vocals. "Ooh" +and "aah" sounds are treated as instrumental in this context. Rap or +spoken word tracks are clearly "vocal". The closer the instrumentalness +value is to 1.0, the greater likelihood the track contains no vocal +content. Values above 0.5 are intended to represent instrumental tracks, +but confidence is higher as the value approaches 1.0.} +\item{key}{The key the track is in. Integers map to pitches using standard +Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D, and so on. If no key +was detected, the value is -1.} +\item{liveness}{Detects the presence of an audience in the recording. Higher +liveness values represent an increased probability that the track was +performed live. A value above 0.8 provides strong likelihood that the +track is live.} +\item{loudness}{The overall loudness of a track in decibels (dB). Loudness +values are averaged across the entire track and are useful for comparing +relative loudness of tracks. Loudness is the quality of a sound that is +the primary psychological correlate of physical strength (amplitude). +Values typically range between -60 and 0 db.} +\item{mode}{Mode indicates the modality (major or minor) of a track, the +type of scale from which its melodic content is derived. Major is +represented by 1 and minor is 0.} +\item{speechiness}{Speechiness detects the presence of spoken words in a +track. The more exclusively speech-like the recording (e.g. talk show, +audio book, poetry), the closer to 1.0 the attribute value. Values above +0.66 describe tracks that are probably made entirely of spoken words. +Values between 0.33 and 0.66 describe tracks that may contain both music +and speech, either in sections or layered, including such cases as rap +music. Values below 0.33 most likely represent music and other +non-speech-like tracks.} +\item{tempo}{The overall estimated tempo of a track in beats per minute +(BPM). In musical terminology, tempo is the speed or pace of a given piece +and derives directly from the average beat duration.} +\item{time_signature}{An estimated time signature. The time signature (meter) +is a notational convention to specify how many beats are in each bar (or +measure). The time signature ranges from 3 to 7 indicating time signatures +of "3/4", to "7/4".} +\item{valence}{A measure from 0.0 to 1.0 describing the musical positiveness +conveyed by a track. Tracks with high valence sound more positive (e.g. +happy, cheerful, euphoric), while tracks with low valence sound more +negative (e.g. sad, depressed, angry).} +\item{explicit}{Are there bad words that prevent typical play on the radio +(not safe for work).} +} + +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 1269 rows and 15 columns. + +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 425 rows and 15 columns. +} +\source{ +Rohan Alexander (2023). \emph{Telling Stories with Data: With} +\emph{Applications in \code{R}}. CRC Press, Toronto. +\url{https://tellingstorieswithdata.com/}. + +The example data are available with the textbook, originally from the +Spotify API and described in Chapter 7. + +Documentation is from \href{https://developer.spotify.com/documentation/web-api/reference/get-audio-features}{Spotify API}. +} +\usage{ +popmusic_train + +popmusic_test +} +\description{ +Audio features for all recordings by Taylor Swift, Radiohead, and The +National, as available in the Spotify API. +} +\details{ +\code{popmusic_train} is the training set. + +\code{popmusic_test} is the test set. +It contains a held out set of 425 songs. +} +\examples{ +popmusic_train + +popmusic_test + +} +\keyword{datasets}