R package that contains all data sets from https://github.com/dariusk/corpora
install.packages("rcorpora")
Calling the corpora()
function without arguments lists all
data sets in the package, calling it with the name of a data
set, returns the data set itself. For example
library(rcorpora)
corpora()
#> [1] "animals/birds_antarctica"
#> [2] "animals/birds_north_america"
#> [3] "animals/cats"
#> [4] "animals/collateral_adjectives"
#> [5] "animals/common"
#> [6] "animals/dinosaurs"
#> [7] "animals/dog_names"
#> [8] "animals/dogs"
#> [9] "animals/donkeys"
#> [10] "animals/horses"
#> [11] "animals/ponies"
#> [12] "archetypes/artifact"
#> [13] "archetypes/character"
#> [14] "archetypes/event"
#> [15] "archetypes/setting"
#> [16] "architecture/passages"
#> [17] "architecture/rooms"
#> [18] "art/isms"
#> [19] "colors/crayola"
#> [20] "colors/dulux"
#> [21] "colors/google_material_colors"
#> [22] "colors/paints"
#> [23] "colors/palettes"
#> [24] "colors/web_colors"
#> [25] "colors/xkcd"
#> [26] "corporations/cars"
#> [27] "corporations/djia"
#> [28] "corporations/fortune500"
#> [29] "corporations/industries"
#> [30] "corporations/nasdaq"
#> [31] "corporations/newspapers"
#> [32] "divination/tarot_interpretations"
#> [33] "divination/zodiac"
#> [34] "film-tv/game-of-thrones-houses"
#> [35] "film-tv/iab_categories"
#> [36] "film-tv/netflix-categories"
#> [37] "film-tv/popular-movies"
#> [38] "film-tv/tv_shows"
#> [39] "foods/apple_cultivars"
#> [40] "foods/bad_beers"
#> [41] "foods/beer_categories"
#> [42] "foods/beer_styles"
#> [43] "foods/breads_and_pastries"
#> [44] "foods/combine"
#> [45] "foods/condiments"
#> [46] "foods/curds"
#> [47] "foods/fruits"
#> [48] "foods/herbs_n_spices"
#> [49] "foods/hot_peppers"
#> [50] "foods/iba_cocktails"
#> [51] "foods/menuItems"
#> [52] "foods/pizzaToppings"
#> [53] "foods/sandwiches"
#> [54] "foods/sausages"
#> [55] "foods/scotch_whiskey"
#> [56] "foods/tea"
#> [57] "foods/vegetable_cooking_times"
#> [58] "foods/vegetables"
#> [59] "foods/wine_descriptions"
#> [60] "games/bannedGames/argentina/bannedList"
#> [61] "games/bannedGames/brazil/bannedList"
#> [62] "games/bannedGames/china/bannedList"
#> [63] "games/bannedGames/denmark/bannedList"
#> [64] "games/cluedo"
#> [65] "games/dark_souls_iii_messages"
#> [66] "games/jeopardy_questions"
#> [67] "games/pokemon"
#> [68] "games/scrabble"
#> [69] "games/street_fighter_ii"
#> [70] "games/trivial_pursuit"
#> [71] "games/wrestling_moves"
#> [72] "games/zelda"
#> [73] "geography/canada_provinces_and_territories"
#> [74] "geography/canadian_municipalities"
#> [75] "geography/countries_with_capitals"
#> [76] "geography/countries"
#> [77] "geography/english_towns_cities"
#> [78] "geography/japanese_prefectures"
#> [79] "geography/london_underground_stations"
#> [80] "geography/nationalities"
#> [81] "geography/norwegian_cities"
#> [82] "geography/nyc_neighborhood_zips"
#> [83] "geography/oceans"
#> [84] "geography/rivers"
#> [85] "geography/sf_neighborhoods"
#> [86] "geography/us_airport_codes"
#> [87] "geography/us_cities"
#> [88] "geography/us_counties"
#> [89] "geography/us_metropolitan_areas"
#> [90] "geography/us_state_capitals"
#> [91] "geography/venues"
#> [92] "geography/winds"
#> [93] "governments/mass-surveillance-project-names"
#> [94] "governments/nsa_projects"
#> [95] "governments/uk_political_parties"
#> [96] "governments/us_federal_agencies"
#> [97] "governments/us_mil_operations"
#> [98] "humans/2016_us_presidential_candidates"
#> [99] "humans/atus_activities"
#> [100] "humans/authors"
#> [101] "humans/bodyParts"
#> [102] "humans/britishActors"
#> [103] "humans/celebrities"
#> [104] "humans/descriptions"
#> [105] "humans/englishHonorifics"
#> [106] "humans/famousDuos"
#> [107] "humans/firstNames"
#> [108] "humans/lastNames"
#> [109] "humans/moods"
#> [110] "humans/norwayFirstNamesBoys"
#> [111] "humans/norwayFirstNamesGirls"
#> [112] "humans/norwayLastNames"
#> [113] "humans/occupations"
#> [114] "humans/prefixes"
#> [115] "humans/richpeople"
#> [116] "humans/scientists"
#> [117] "humans/spanishFirstNames"
#> [118] "humans/spanishLastNames"
#> [119] "humans/spinalTapDrummers"
#> [120] "humans/suffixes"
#> [121] "humans/thirdPersonPronouns"
#> [122] "humans/tolkienCharacterNames"
#> [123] "humans/us_presidents"
#> [124] "humans/wrestlers"
#> [125] "instructions/laundry_care"
#> [126] "materials/abridged-body-fluids"
#> [127] "materials/building-materials"
#> [128] "materials/carbon-allotropes"
#> [129] "materials/decorative-stones"
#> [130] "materials/fabrics"
#> [131] "materials/fibers"
#> [132] "materials/gemstones"
#> [133] "materials/layperson-metals"
#> [134] "materials/metals"
#> [135] "materials/natural-materials"
#> [136] "materials/packaging"
#> [137] "materials/plastic-brands"
#> [138] "materials/sculpture-materials"
#> [139] "materials/technical-fabrics"
#> [140] "mathematics/fibonnaciSequence"
#> [141] "mathematics/primes_binary"
#> [142] "mathematics/primes"
#> [143] "mathematics/trigonometry"
#> [144] "medicine/diagnoses"
#> [145] "medicine/drugNameStems"
#> [146] "medicine/drugs"
#> [147] "medicine/hospitals"
#> [148] "music/a_list_of_guitar_manufacturers"
#> [149] "music/bands_that_have_opened_for_tool"
#> [150] "music/female_classical_guitarists"
#> [151] "music/genres"
#> [152] "music/hamilton_musical_obcrecording_actors_characters"
#> [153] "music/instruments"
#> [154] "music/mtv_day_one"
#> [155] "music/rock_hall_of_fame"
#> [156] "music/xxl_freshman"
#> [157] "mythology/greek_gods"
#> [158] "mythology/greek_monsters"
#> [159] "mythology/greek_myths_master"
#> [160] "mythology/greek_titans"
#> [161] "mythology/hebrew_god"
#> [162] "mythology/lovecraft"
#> [163] "mythology/monsters"
#> [164] "mythology/norse_gods"
#> [165] "objects/clothing"
#> [166] "objects/corpora_winners"
#> [167] "objects/objects"
#> [168] "plants/cannabis"
#> [169] "plants/flowers"
#> [170] "plants/plants"
#> [171] "religion/christian_saints"
#> [172] "religion/fictional_religions"
#> [173] "religion/parody_religions"
#> [174] "religion/religions"
#> [175] "science/elements"
#> [176] "science/hail_size"
#> [177] "science/minor_planets"
#> [178] "science/planets"
#> [179] "science/pregnancy"
#> [180] "science/toxic_chemicals"
#> [181] "science/weather_conditions"
#> [182] "societies_and_groups/animal_welfare"
#> [183] "societies_and_groups/designated_terrorist_groups/australia"
#> [184] "societies_and_groups/designated_terrorist_groups/canada"
#> [185] "societies_and_groups/designated_terrorist_groups/china"
#> [186] "societies_and_groups/designated_terrorist_groups/egypt"
#> [187] "societies_and_groups/designated_terrorist_groups/european_union"
#> [188] "societies_and_groups/designated_terrorist_groups/india"
#> [189] "societies_and_groups/designated_terrorist_groups/iran"
#> [190] "societies_and_groups/designated_terrorist_groups/israel"
#> [191] "societies_and_groups/designated_terrorist_groups/kazakhstan"
#> [192] "societies_and_groups/designated_terrorist_groups/russia"
#> [193] "societies_and_groups/designated_terrorist_groups/saudi_arabia"
#> [194] "societies_and_groups/designated_terrorist_groups/tunisia"
#> [195] "societies_and_groups/designated_terrorist_groups/turkey"
#> [196] "societies_and_groups/designated_terrorist_groups/uae"
#> [197] "societies_and_groups/designated_terrorist_groups/ukraine"
#> [198] "societies_and_groups/designated_terrorist_groups/united_kingdom"
#> [199] "societies_and_groups/designated_terrorist_groups/united_nations"
#> [200] "societies_and_groups/designated_terrorist_groups/united_states"
#> [201] "societies_and_groups/fraternities/coeducational_fraternities"
#> [202] "societies_and_groups/fraternities/defunct"
#> [203] "societies_and_groups/fraternities/fraternities"
#> [204] "societies_and_groups/fraternities/professional"
#> [205] "societies_and_groups/fraternities/service"
#> [206] "societies_and_groups/fraternities/sororities"
#> [207] "societies_and_groups/semi_secret"
#> [208] "sports/football/epl_teams"
#> [209] "sports/football/laliga_teams"
#> [210] "sports/football/serieA"
#> [211] "sports/mlb_teams"
#> [212] "sports/nba_mvps"
#> [213] "sports/nba_teams"
#> [214] "sports/nfl_teams"
#> [215] "sports/nhl_teams"
#> [216] "sports/olympics"
#> [217] "technology/appliances"
#> [218] "technology/computer_sciences"
#> [219] "technology/fireworks"
#> [220] "technology/guns_n_rifles"
#> [221] "technology/knots"
#> [222] "technology/lisp"
#> [223] "technology/new_technologies"
#> [224] "technology/photo_sharing_websites"
#> [225] "technology/programming_languages"
#> [226] "technology/social_networking_websites"
#> [227] "technology/video_hosting_websites"
#> [228] "transportation/commercial-aircraft"
#> [229] "travel/lcc"
#> [230] "words/adjs"
#> [231] "words/adverbs"
#> [232] "words/closed_pairs"
#> [233] "words/common"
#> [234] "words/compounds"
#> [235] "words/crash_blossoms"
#> [236] "words/eggcorns"
#> [237] "words/emoji/cute_kaomoji"
#> [238] "words/emoji/emoji"
#> [239] "words/encouraging_words"
#> [240] "words/ergative_verbs"
#> [241] "words/expletives"
#> [242] "words/harvard_sentences"
#> [243] "words/infinitive_verbs"
#> [244] "words/interjections"
#> [245] "words/literature/infinitejest"
#> [246] "words/literature/lovecraft_words"
#> [247] "words/literature/mr_men_little_miss"
#> [248] "words/literature/shakespeare_phrases"
#> [249] "words/literature/shakespeare_sonnets"
#> [250] "words/literature/shakespeare_words"
#> [251] "words/literature/technology_quotes"
#> [252] "words/nouns"
#> [253] "words/oprah_quotes"
#> [254] "words/personal_nouns"
#> [255] "words/personal_pronouns"
#> [256] "words/possessive_pronouns"
#> [257] "words/prefix_root_suffix"
#> [258] "words/prepositions"
#> [259] "words/proverbs"
#> [260] "words/resume_action_words"
#> [261] "words/rhymeless_words"
#> [262] "words/spells"
#> [263] "words/state_verbs"
#> [264] "words/states_of_drunkenness"
#> [265] "words/stopwords/ar"
#> [266] "words/stopwords/bg"
#> [267] "words/stopwords/cs"
#> [268] "words/stopwords/da"
#> [269] "words/stopwords/de"
#> [270] "words/stopwords/en"
#> [271] "words/stopwords/es"
#> [272] "words/stopwords/fi"
#> [273] "words/stopwords/fr"
#> [274] "words/stopwords/gr"
#> [275] "words/stopwords/it"
#> [276] "words/stopwords/jp"
#> [277] "words/stopwords/lv"
#> [278] "words/stopwords/nl"
#> [279] "words/stopwords/no"
#> [280] "words/stopwords/pl"
#> [281] "words/stopwords/pt"
#> [282] "words/stopwords/ru"
#> [283] "words/stopwords/sk"
#> [284] "words/stopwords/sv"
#> [285] "words/stopwords/tr"
#> [286] "words/strange_words"
#> [287] "words/units_of_time"
#> [288] "words/us_president_quotes"
#> [289] "words/verbs_with_conjugations"
#> [290] "words/verbs"
#> [291] "words/word_clues/clues_five"
#> [292] "words/word_clues/clues_four"
#> [293] "words/word_clues/clues_six"
corpora("foods/pizzaToppings")
#> $description
#> [1] "A list of pizza toppings."
#>
#> $pizzaToppings
#> [1] "anchovies" "artichoke" "bacon"
#> [4] "breakfast bacon" "Canadian bacon" "cheese"
#> [7] "chicken" "chili peppers" "feta"
#> [10] "garlic" "green peppers" "grilled onions"
#> [13] "ground beef" "ham" "hot sauce"
#> [16] "meatballs" "mushrooms" "olives"
#> [19] "onions" "pepperoni" "pineapple"
#> [22] "sausage" "spinach" "sun-dried tomato"
#> [25] "tomatoes"
CC0