diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9e6f0a2..37b1bc9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,5 +7,9 @@ jobs: steps: - uses: actions/checkout@v3 - uses: dtolnay/rust-toolchain@stable + - name: Check out CSL styles + run: | + cd .. + git clone --depth 1 https://github.com/citation-style-language/styles - run: cargo build - - run: cargo test + - run: cargo test --features csl-json diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3d34848 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,102 @@ +# 0.4.0 + +## Breaking changes: + +Hayagriva now uses the [Citation Style Language](https://citationstyles.org) to +encode formatting styles. This means that Hayagriva's own formatting styles have +been deprecated. + +### For users: +- The YAML input format has changed. + - Titles and formattable strings have been merged into one type. All + formattable strings can have a shorthand now. + - Formattable Strings do not have `title-case` and `sentence-case` keys + anymore. `shorthand` has been renamed to `short`. To prevent changes of + the text case of formattable strings, you can use braces. Enclose a part + of a formattable string (or `short`) in `{braces}` to print it as-is. + - The fields `doi`, `isbn`, and `issn` have been moved to `serial-number` + which can now be a dictionary containing these and arbitrary other serial + numbers like a `pmid` (PubMed ID) and `arxiv` (ArXiv Identifier). + - The `tweet` entry type has been renamed to `post`. + - All numeric variables can now also contains strings. Numbers can have + string affixes. + +Refer to the updated +[file format](https://github.com/typst/hayagriva/blob/main/docs/file-format.md) +docs for examples. + +### For developers: +- To use a CSL style, you can either supply a CSL file or use an archive of + provided styles with the `archive` feature. +- The `from_yaml_str` function will now return the new `Library` struct, with the + entries within. +- The `Database` struct has been replaced by the easier to handle + `BibliographyDriver`. +- We switched from `yaml_rust` to `serde_yaml`. The `Entry` now implement's + `serde`'s `Serialize` and `Deserialize` traits. Hence, the `from_yaml` and + `to_yaml` functions have been deleted. +- Brackets are no longer individually overridable. Instead, use the new + `CitePurpose`. +- `Entry::kind` has been renamed to `Entry::entry_type`. + - The citation styles `AuthorTitle` and `Keys` have been removed but can be + realized with CSL. + +This release fixes many bugs and makes Hayagriva a serious contender for +reference management. + +## Other changes + +- We added the entry types `Performance` and `Original`. +- We added the field `call-number`. + + +# 0.3.2 + +Fixes a title case formatting bug introduced in the previous release. + +# 0.3.1 + +_Bug Fixes:_ +- Added an option to turn off abbreviation of journals (thanks to @CMDJojo) +- Fixed bugs with title case formatting (thanks to @jmskov) +- Fixed off-by-one error with dates in APA style (thanks to @bluebear94) +- Fixed supplements in the Alphanumeric and AuthorTitle styles (thanks to @lynn) +- Fixed bugs with sentence case formatting +- Fixed `verbatim` option +- Fixed terminal formatting +- Fixed some typos (thanks to @kianmeng and @bluebear94) + +# 0.3.0 + +*Breaking:* +- Updated to `biblatex` 0.8.0 + +*Bug Fixes:* +- Fixed string indexing for titles, removed panic +- More permissive BibLaTeX parsing + +# 0.2.1 + +*Bug Fixes:* +- Fixed APA bibliography ordering + +# 0.2.0 + +*Breaking:* +- Replaced `NoHyphenation` formatting with `Link` formatting +- Switched to newest BibLaTeX (which is part of the public API) + +*Bug Fixes:* +- Fixed IEEE bibliography ordering +- Fixed A, B, C, ... suffixes for Author Date citations +- Removed `println` calls + +# 0.1.1 + +🐞 This release fixes the documentation of the CLI in the `README.md` file. +✨ There are new options for bracketed citations in the CLI. +✅ No breaking changes. + +# 0.1.0 + +🎉 This is the initial release! \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 687874d..c551c06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,8 @@ [package] name = "hayagriva" -version = "0.3.2" +version = "0.4.0" authors = ["Martin Haug "] -edition = "2018" +edition = "2021" license = "MIT OR Apache-2.0" description = "Work with references: Literature database management, storage, and citation formatting" repository = "https://github.com/typst/hayagriva" @@ -11,25 +11,37 @@ categories = ["template-engine", "value-formatting", "command-line-utilities"] keywords = ["bibliography", "citation", "reference", "bibtex", "literature"] [features] -default = ["biblatex"] -cli = ["clap"] +default = ["biblatex", "archive"] +cli = ["clap", "strum"] +archive = ["rkyv", "ciborium"] +csl-json = ["citationberg/json"] [dependencies] -biblatex = { version = "0.8", optional = true } -clap = { version = "3.1", optional = true, features = ["cargo"] } -chrono = { version = "0.4", default-features = false } -isolang = "2.1" -lazy_static = "1.4.0" -linked-hash-map = "0.5.3" -paste = "1.0.0" -regex = { version = "1.4", default-features = false, features = ["std", "unicode-perl"] } -strum = { version = "0.24", features = ["derive"] } -thiserror = "1.0.20" -unic-langid = "0.9.0" +citationberg = { version = "0.1" } +indexmap = { version = "2.0.2", features = ["serde"] } +numerals = "0.1.4" +paste = "1.0.14" +serde = { version = "1", features = ["derive"] } +serde_yaml = "0.9.25" +thiserror = "1.0.48" +unic-langid = { version = "0.9.0", features = ["serde"] } unicode-segmentation = "1.6.0" -url = "2.1.1" -yaml-rust = "0.4.4" +unscanny = "0.1.0" +url = { version = "2.4", features = ["serde"] } +biblatex = { version = "0.8.1", optional = true } +ciborium = { version = "0.2.1", optional = true } +clap = { version = "3.1", optional = true, features = ["cargo"] } +rkyv = { version = "0.7.42", optional = true } +strum = { version = "0.24", features = ["derive"], optional = true } + +[dev-dependencies] +serde_json = "1" [[bin]] name = "hayagriva" required-features = ["cli"] + +[[test]] +name = "citeproc" +path = "tests/citeproc.rs" +required-features = ["csl-json"] diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..4cfe2d3 --- /dev/null +++ b/NOTICE @@ -0,0 +1,8 @@ +Licenses for third party components used by this project can be found below. + +================================================================================ +The Creative Commons BY-SA 3.0 DEED License applies to: +* The CSL styles found in `tests/data/*` +* The CSL styles and locales found in `styles.cbor.rkyv` + +https://creativecommons.org/licenses/by-sa/3.0/ \ No newline at end of file diff --git a/README.md b/README.md index 6dae9dc..5285ea9 100644 --- a/README.md +++ b/README.md @@ -24,34 +24,14 @@ to install and use Hayagriva on your terminal. ## Supported styles -- Institute of Electrical and Electronics Engineers (IEEE) - - References - - Numerical citations -- Modern Language Association (MLA), 8th edition of the MLA Handbook - - "Works Cited" references -- Chicago Manual of Style (CMoS), 17th edition - - Notes and Bibliography - - Author-Date references and citations -- American Psychological Association (APA), 7th edition of the APA Publication Manual - - References -- Other in-text citation styles - - Alphanumerical (e. g. "Rass97") - - Author Title - -## Usage - -Add this to your `Cargo.toml`: -```toml -[dependencies] -hayagriva = "0.3" -``` +Hayagriva supports all styles provided in the +[official Citation Style Language repository](https://github.com/citation-style-language/styles), +currently over 2,600. -Below, there is an example of how to parse a YAML database and get a Modern -Language Association-style citation. +# Usage ```rust use hayagriva::io::from_yaml_str; -use hayagriva::style::{Database, Mla}; let yaml = r#" crazy-rich: @@ -65,20 +45,46 @@ crazy-rich: // Parse a bibliography let bib = from_yaml_str(yaml).unwrap(); -assert_eq!(bib[0].date().unwrap().year, 2014); +assert_eq!(bib.get("crazy-rich").unwrap().date().unwrap().year, 2014); // Format the reference -let db = Database::from_entries(bib.iter()); -let mut mla = Mla::new(); -let reference = db.bibliography(&mut mla, None); -assert_eq!(reference[0].display.value, "Kwan, Kevin. Crazy Rich Asians. Anchor Books, 2014."); +use std::fs; +use hayagriva::{ + BibliographyDriver, BibliographyRequest, BufWriteFormat, + CitationItem, CitationRequest, +}; +use hayagriva::citationberg::{LocaleFile, IndependentStyle}; + +let en_locale = fs::read_to_string("tests/data/locales-en-US.xml").unwrap(); +let locales = [LocaleFile::from_xml(&en_locale).unwrap().into()]; + +let style = fs::read_to_string("tests/data/art-history.csl").unwrap(); +let style = IndependentStyle::from_xml(&style).unwrap(); + +let mut driver = BibliographyDriver::new(); + +for entry in bib.iter() { + let items = vec![CitationItem::with_entry(entry)]; + driver.citation(CitationRequest::from_items(items, &style, &locales)); +} + +let result = driver.finish(BibliographyRequest { + style: &style, + locale: None, + locale_files: &locales, +}); + +for cite in result.citations { + println!("{}", cite.citation.to_string()) +} ``` -Formatting for in-text citations is available through implementors of the -`hayagriva::style::CitationStyle` trait whereas bibliographies can be created by -`hayagriva::style::BibliographyStyle`. Both traits are used through -`style::Database` which provides methods to format its records as bibliographies -and citations. +To format entries, you need to wrap them in a `CitationRequest`. Each of these +can reference multiple entries in their respective `CitationItem`s. +Use these with a `BibliographyDriver` to obtain formatted citations and bibliographies. + +You can either supply your own CSL files or choose from about 100 bundled +citation styles using the `archive` feature. If the default features are enabled, Hayagriva supports BibTeX and BibLaTeX bibliographies. You can use `io::from_biblatex_str` to parse such @@ -133,7 +139,7 @@ quantized-vortex: let entries = from_yaml_str(yaml).unwrap(); let journal = select!((Article["date"]) > ("journal":Periodical)); -assert!(journal.matches(&entries[0])); +assert!(journal.matches(entries.nth(0).unwrap())); ``` There are two ways to check if a selector matches an entry. @@ -164,7 +170,8 @@ dependence: title: The program dependence graph and its use in optimization author: ["Ferrante, Jeanne", "Ottenstein, Karl J.", "Warren, Joe D."] date: 1987-07 - doi: "10.1145/24039.24041" + serial-number: + doi: "10.1145/24039.24041" parent: type: Periodical title: ACM Transactions on Programming Languages and Systems @@ -254,12 +261,10 @@ tab. We would also be very happy to accept PRs for bug fixes, minor refactorings, features that were requested in the issues and greenlit by us, as well as the planned features listed below: -- More citation and reference styles (especially styles used in the 'hard' - sciences would be incredibly appreciated) - Implementing the YAML-to-BibLaTeX conversion -- Improvements to the sentence and title formatter -- Work for non-English bibliographies - Documentation improvements +- CSL bugfixes +- CSL-M Support We wish to thank each and every prospective contributor for the effort you (plan to) invest in this project and for adopting it! @@ -271,3 +276,9 @@ Hayagriva is licensed under a MIT / Apache 2.0 dual license. Users and consumers of the library may choose which of those licenses they want to apply whereas contributors have to accept that their code is in compliance and distributed under the terms of both of these licenses. + +Hayagriva includes CSL styles that are licensed as CC-BY-SA 3.0 Deed if the +`archive` feature is enabled. The file `styles.cbor.rkyv` is a collection of +these works and falls under this license. Retrieve attribution information by +deserializing it using the `styles` function and reading the `StyleInfo` +structs. diff --git a/docs/file-format.md b/docs/file-format.md index 09cb6fe..2ec887a 100644 --- a/docs/file-format.md +++ b/docs/file-format.md @@ -150,7 +150,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | title | +| **Data type:** | formattable string | | **Description:** | title of the item | | **Example:** | `title: Rick Astley: How An Internet Joke Revived My Career` | @@ -194,6 +194,14 @@ This section lists all possible fields and data types for them. | **Description:** | persons involved with the item that do not fit `author` or `editor` | | **Example:** |
affiliated:
- role: Director
names: Cameron, James
- role: CastMember
names: ["Schwarzenegger, Arnold", "Hamilton, Linda", "Patrick, Robert"]
| +#### `call-number` + +| | | +|------------------|-----------------------------------------------------------| +| **Data type:** | formattable string | +| **Description:** | The number of the item in a library, institution, or collection. Use with `archive`.| +| **Example:** | `call-number: "F16 D14"` | + #### `publisher` | | | @@ -214,7 +222,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | string | +| **Data type:** | formattable string | | **Description:** | Organization at/for which the item was produced | | **Example:** | `organization: Technische Universität Berlin` | @@ -222,7 +230,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | integer or string | +| **Data type:** | numeric or string | | **Description:** | For an item whose parent has multiple issues, indicates the position in the issue sequence. Also used to indicate the episode number for TV. | | **Example:** | `issue: 5` | @@ -230,7 +238,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | integer or range of integers | +| **Data type:** | numeric or string | | **Description:** | For an item whose parent has multiple volumes/parts/seasons ... of which this item is one | | **Example:** | `volume: 2-3` | @@ -238,7 +246,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | integer | +| **Data type:** | numeric | | **Description:** | Total number of volumes/parts/seasons this item consists of | | **Example:** | `volume-total: 12` | @@ -246,7 +254,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | integer or string | +| **Data type:** | numeric or string | | **Description:** | published version of an item | | **Example:** | `edition: expanded and revised edition` | @@ -254,7 +262,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | integer _(single page)_ or integer range | +| **Data type:** | numeric or string | | **Description:** | the range of pages within the parent this item occupies | | **Example:** | `page-range: 812-847` | @@ -262,7 +270,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | integer | +| **Data type:** | numeric | | **Description:** | total number of pages the item has | | **Example:** | `page-total: 1103` | @@ -302,25 +310,9 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | string | -| **Description:** | any serial number or version describing the item that is not appropriate for the fields `doi`, `edition`, `isbn` or `issn` (may be assigned by the author of the item; especially useful for preprint archives) | -| **Example:** | `serial-number: 2003.13722` | - -#### `isbn` - -| | | -|------------------|-----------------------------------------------------------| -| **Data type:** | string | -| **Description:** | International Standard Book Number (ISBN), prefer ISBN-13 | -| **Example:** | `isbn: 978-0-20189683-1` | - -#### `issn` - -| | | -|------------------|-----------------------------------------------------------| -| **Data type:** | string | -| **Description:** | International Standard Serial Number (ISSN) | -| **Example:** | `issn: 0014-1704` | +| **Data type:** | string or dictionary of strings | +| **Description:** | Any serial number. If you have serial numbers of well-known schemes like `doi`, you can put them into the serial number as a dictionary like in the second example. Hayagriva will recognize and specially treat `doi`, `isbn` `issn`, `pmid`, `pmcid`, and `arxiv` | +| **Example:** | `serial-number: 2003.13722` or
serial-number:
doi: "10.22541/au.148771883.35456290"
arxiv: "1906.00356"
| #### `language` @@ -350,7 +342,7 @@ This section lists all possible fields and data types for them. | | | |------------------|-----------------------------------------------------------| -| **Data type:** | string | +| **Data type:** | formattable string | | **Description:** | additional description to be appended after reference list entry | | **Example:** | `note: microfilm version` | @@ -378,8 +370,10 @@ Needs a keyword with one of the following values: - `newspaper`. The issue of a newspaper that was published on a given day. - `legislation`. Legal document or draft thereof that is, is to be, or was to be enacted into binding law (default parent: `anthology`). - `manuscript`. Written document that is submitted as a candidate for publication. -- `tweet`. A post on a micro-blogging platform like Twitter (default parent: `tweet`). +- `original`. The original container of the entry before it was re-published. +- `post`. A post on a micro-blogging platform like Twitter (default parent: `post`). - `misc`. Items that do not match any of the other Entry type composites. +- `performance`. A live artistic performance. - `periodical`. A publication that periodically publishes issues with unique content. This includes scientific journals and news magazines. - `proceedings`. The official published record of the events at a professional conference. - `book`. Long-form work published physically as a set of bound sheets. @@ -397,49 +391,46 @@ The field is case insensitive. It defaults to `Misc` or the default parent if th #### Formattable String -A formattable string is a string that may run through a sentence or title case transformer when used in a reference or citation. You can disable these transformations or provide your own title and sentence case versions of the string. +A formattable string is a string that may run through a text case transformer when used in a reference or citation. You can disable these transformations on segments of the string or the whole string. -The simplest scenario for a formattable string is to provide a string: +The simplest scenario for a formattable string is to provide a string that can be case-folded: ```yaml publisher: UN World Food Programme ``` -To disable formatting altogether and instead preserve the casing as it appears in the source string, put the string in the `value` sub-field and specify another sub-field as `verbatim: true`: +If you want to preserve a part of the string but want to go with the style's +behavior otherwise, enclose the string in braces like below. You must wrap the +whole string in quotes if you do this. ```yaml -publisher: - value: UN World Food Programme - verbatim: true +publisher: "{imagiNary} Publishing" ``` -If you instead want to provide a custom sentence- or title-cased version of the string, you can write them in their own sub-fields (note that the sub-field value with the canonical name always has to be specified): + +To disable formatting altogether and instead preserve the casing as it appears +in the source string, put the string in the `value` sub-field and specify +another sub-field as `verbatim: true`: ```yaml publisher: - value: imagiNary Publishing - title-case: Imaginary Publishing - sentence-case: imagiNary publishing + value: UN World Food Programme + verbatim: true ``` -A `sentence-case` or `title-case` sub-field will take precedence over `verbatim`. +Title and sentence case folding will always be deactivated if your item has set +the `language` key to something other than English. -#### Title +You can also include mathematical markup evaluated by [Typst](https://typst.app) by +wrapping it in dollars. -A title is a formattable string that can have two additional sub-fields: `translation` (the translated name of the item) and `shorthand` (shortened name of the item used if a citation style requires it). Both of these fields are formattable strings themselves. Just like a formattable string, a title can be just a string. +Furthermore, every formattable string can include a short form that a citation +style can choose to render over the longer form. ```yaml -title: The Nutcracker -``` - -Example with translation and shorthand: - -```yaml -title: - value: Щелкунчик - verbatim: true - translation: The Nutcracker - shorthand: Nutcracker +journal: + value: International Proceedings of Customs + short: Int. Proc. Customs ``` #### Person @@ -523,13 +514,15 @@ time-range: "03:35:21-03:58:46" Strings are sequences of characters as a field value. In most cases you can write your string after the colon, but if it contains a special character (`:`, `{`, `}`, `[`, `]`, `,`, `&`, `*`, `#`, `?`, `|`, `-`, `<`, `>`, `=`, `!`, `%`, `@`, `\`) it should be wrapped with double-quotes. If your string contains double-quotes, you can write those as this escape sequence: `\"`. If you instead wrap your string in single quotes, most YAML escape sequences such as `\n` for a line break will be ignored. -#### Integer - -Integers are whole numbers that can be negative, e. g. `53789` or `-3`. +#### Numeric -#### Integer range +Numeric variables are one or more numbers that are delimited by commas, +ampersands, and hyphens. Numeric variables can express a single number or a +range and contain only integers, but may contain negative numbers. Numeric variables can have a non-numeric prefix and suffix. -Integer ranges are two integers within a string, separated by a hyphen and optionally spaces (`6 - 18`). Both integers must be positive. +```yaml +page-range: S10-15 +``` #### Unicode Language Identifier diff --git a/docs/selectors.md b/docs/selectors.md index 1ba7a78..ee94b3c 100644 --- a/docs/selectors.md +++ b/docs/selectors.md @@ -93,7 +93,7 @@ This selects a chapter in a monograph (a long-form text on a subject published i _Less interesting for CLI users._ Once you know that a matching entry has some parents, you might need to access their fields. You can be sure you got the right parent with bindings. -Create a binding using the `:`-operator. To the left of the selector, you provide a name which you can freely choose. The parent (or the top-level entry) that matches the selector to the right will then appear in the resulting map under the key that you have chosen. +Create a binding using the `:` operator. To the left of the selector, you provide a name which you can freely choose. The parent (or the top-level entry) that matches the selector to the right will then appear in the resulting map under the key that you have chosen. | Variant | Example | |-------------|------------------------------------------------------| diff --git a/src/csl/archive.rs b/src/csl/archive.rs new file mode 100644 index 0000000..f5191db --- /dev/null +++ b/src/csl/archive.rs @@ -0,0 +1,111 @@ +//! Optional archive of included CSL styles. + +use citationberg::{Locale, Style}; +use rkyv::{Archive, Deserialize, Serialize}; +use serde::de::DeserializeOwned; +use std::collections::{BTreeMap, HashMap}; + +#[repr(align(8))] +struct Data(T); + +static ARCHIVE: &Data<[u8]> = &Data(*include_bytes!("../../styles.cbor.rkyv")); + +/// In-memory representation of a CSL archive. +#[derive(Debug, Clone, Archive, Serialize, Deserialize)] +pub struct Lookup { + /// Maps from a CSL style name to an index into the `styles` vector. + pub map: BTreeMap, + /// Maps from a CSL ID to an index into the `styles` vector. + pub id_map: HashMap, + /// The CSL styles in the archive as CBOR-encoded bytes. + pub styles: Vec>, + /// The locales in the archive as CBOR-encoded bytes. + pub locales: Vec>, +} + +/// A match between a style name and a style. +#[derive(Debug, Clone, Archive, Serialize, Deserialize, PartialEq, Eq)] +pub struct StyleMatch { + /// A full, descriptive name of the style. + pub full_name: String, + /// Whether this is an alias. + pub alias: bool, + /// Whether the style contains a bibliography. + pub bibliography: bool, + /// The style index. + pub index: usize, +} + +impl StyleMatch { + /// Create a new style match. + pub fn new(full_name: String, alias: bool, bibliography: bool, index: usize) -> Self { + Self { full_name, alias, bibliography, index } + } +} + +/// Read an archive +fn read() -> &'static ::Archived { + unsafe { rkyv::archived_root::(&ARCHIVE.0) } +} + +/// An archived CSL style. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ArchiveStyle { + /// Name of the style. + pub name: &'static str, + /// A full, descriptive name of the style. + pub full_name: &'static str, + /// Whether this is an alias. + pub alias: bool, + index: u32, +} + +impl ArchiveStyle { + /// Whether a style is an alias of another style. + pub fn is_alias(self, other: Self) -> bool { + self.index == other.index + } +} + +/// Retrieve a list of styles. +pub fn styles() -> impl Iterator { + read().map.iter().map(|(k, v)| ArchiveStyle { + name: k.as_str(), + index: v.index, + full_name: &v.full_name, + alias: v.alias, + }) +} + +/// Retrieve a style from the archive +pub fn style(s: ArchiveStyle) -> Style { + from_cbor:: \ No newline at end of file diff --git a/tests/archiver.rs b/tests/archiver.rs new file mode 100644 index 0000000..b5a38d0 --- /dev/null +++ b/tests/archiver.rs @@ -0,0 +1,480 @@ +#![cfg(feature = "rkyv")] +use citationberg::Style; +use citationberg::{Locale, LocaleFile, XmlError}; +use rkyv::Archive; +use serde::de::DeserializeOwned; +use serde::Serialize; +use std::collections::HashSet; +use std::collections::{BTreeMap, HashMap}; +use std::fmt; +use std::fs; +use std::io; +use std::path::PathBuf; + +use hayagriva::archive::{Lookup, StyleMatch}; + +mod common; +use common::{ensure_repo, iter_files, iter_files_with_name}; + +use crate::common::CACHE_PATH; + +const STYLES_REPO_NAME: &str = "styles"; +const CSL_REPO: &str = "https://github.com/citation-style-language/styles"; +const LOCALES_REPO: &str = "https://github.com/citation-style-language/locales"; +const LOCALES_REPO_NAME: &str = "locales"; +const ARCHIVE_NAME: &str = "styles.cbor.rkyv"; +const OWN_STYLES: &str = "styles"; + +/// Ensure the CSL repos are available, create an archive, and validate it. +#[test] +fn try_archive() { + ensure_repos().unwrap(); + + // Create the archive if it does not exist + let created = if !PathBuf::from(ARCHIVE_NAME).exists() { + create_archive().unwrap(); + true + } else { + false + }; + + match (created, validate_archive()) { + (true, Err(e)) => panic!("{:?}", e), + (false, Err(_)) => create_archive().unwrap(), + (_, Ok(())) => {} + } +} + +/// Always archive. +#[test] +#[ignore] +fn always_archive() { + ensure_repos().unwrap(); + create_archive().unwrap(); + validate_archive().unwrap(); +} + +/// Download the CSL styles and locales repos. +fn ensure_repos() -> Result<(), ArchivalError> { + ensure_repo(CSL_REPO, STYLES_REPO_NAME, "master")?; + Ok(ensure_repo(LOCALES_REPO, LOCALES_REPO_NAME, "master")?) +} + +/// Create an archive of CSL and its locales as CBOR. +fn create_archive() -> Result<(), ArchivalError> { + let style_path = PathBuf::from(CACHE_PATH).join(STYLES_REPO_NAME); + let own_style_path = PathBuf::from(OWN_STYLES); + let mut res = Lookup { + map: BTreeMap::new(), + id_map: HashMap::new(), + styles: Vec::new(), + locales: retrieve_locales()?, + }; + + for path in iter_files(&style_path, "csl").chain(iter_files(&own_style_path, "csl")) { + let style: Style = Style::from_xml(&fs::read_to_string(path)?)?; + let Style::Independent(indep) = &style else { + continue; + }; + + if STYLE_IDS.binary_search(&indep.info.id.as_str()).is_err() { + continue; + } + + let bytes = to_cbor_vec(&style)?; + let idx = res.styles.len(); + let id = strip_id(indep.info.id.as_str()); + + let rides = OVERRIDES; + let over = rides.iter().find(|o| o.id == id); + let name = clean_name(id, over); + + println!("\n{};{}", &id, &name); + + let mut insert = |name: &str, alias: bool| { + if res + .map + .insert( + name.to_string(), + StyleMatch::new( + indep.info.title.value.to_string(), + alias, + indep.bibliography.is_some(), + idx, + ), + ) + .is_some() + { + panic!("duplicate name {} ({})", name, idx); + } + + if !alias { + res.id_map.insert(indep.info.id.clone(), idx); + } + }; + + insert(&name, false); + + for alias in over.and_then(|o| o.alias.as_ref()).iter().flat_map(|a| a.iter()) { + insert(alias, true); + } + + res.styles.push(bytes); + } + + assert_eq!(res.styles.len(), STYLE_IDS.len()); + + let bytes = rkyv::to_bytes::<_, 1024>(&res).expect("failed to serialize vec"); + fs::write("styles.cbor.rkyv", bytes)?; + + Ok(()) +} + +fn clean_name(id: &str, over: Option<&Override>) -> String { + if let Some(name) = over.and_then(|o| o.main) { + name.to_string() + } else { + id.trim_end_matches("-journals") + .trim_end_matches("-publications") + .trim_end_matches("-brackets") + .trim_end_matches("-group") + .trim_end_matches("-bibliography") + .to_string() + } +} + +/// Retrieve all available CSL locales. +fn retrieve_locales() -> Result>, ArchivalError> { + let mut res = Vec::new(); + let locales_path = PathBuf::from(CACHE_PATH).join(LOCALES_REPO_NAME); + for path in iter_files_with_name(&locales_path, "xml", |n| n.starts_with("locales-")) + { + let xml = fs::read_to_string(path)?; + let locale: Locale = LocaleFile::from_xml(&xml)?.into(); + let bytes = to_cbor_vec(&locale)?; + res.push(bytes); + } + + Ok(res) +} + +/// Check whether all desired styles are available and correctly encoded. +fn validate_archive() -> Result<(), ArchivalError> { + let archive_file = fs::read("styles.cbor.rkyv")?; + if archive_file.is_empty() { + return Err(ArchivalError::ValidationError("empty file".to_string())); + } + let archive = unsafe { read(&archive_file) }; + + // Check that every archive entry maps to a style. + for (k, idx) in archive.map.iter().filter(|(_, v)| !v.alias) { + let bytes = archive + .styles + .get(idx.index as usize) + .ok_or_else(|| ArchivalError::ValidationError(k.to_string()))?; + let style = from_cbor:: \ No newline at end of file diff --git a/tests/basic.yml b/tests/data/basic.yml similarity index 91% rename from tests/basic.yml rename to tests/data/basic.yml index 15456df..f058591 100644 --- a/tests/basic.yml +++ b/tests/data/basic.yml @@ -9,13 +9,14 @@ zygos: verbatim: true date: 2017 page-range: 325-341 - doi: 10.1145/3132747.3132780 + serial-number: + doi: 10.1145/3132747.3132780 parent: - type: Proceedings - title: Proceedings of the 26th Symposium on Operating Systems Principles - publisher: Association for Computing Machinery - location: New York, NY, USA - isbn: 978-1450350853 + - type: Proceedings + title: Proceedings of the 26th Symposium on Operating Systems Principles + publisher: Association for Computing Machinery + location: New York, NY, USA + serial-number: { isbn: "978-1450350853" } wwdc-network: type: Article @@ -25,9 +26,8 @@ wwdc-network: parent: - type: Conference title: - value: World Wide Developer Conference 2020 - verbatim: true - shorthand: WWDC 2020 + value: "{World Wide Developer Conference 2020}" + short: WWDC 2020 organization: Apple Inc. location: Mountain View, CA - type: Video @@ -68,7 +68,7 @@ swedish: editor: Brown, George C. title: value: "A Swedish Traveller in Early Wisconcin: The Observations of Fredrika Bremer" - shorthand: Swedish Traveller + short: Swedish Traveller volume: 1-2 parent: type: Periodical @@ -86,9 +86,9 @@ harry: author: Rowling, J. K. volume: 5 volume-total: 7 - isbn: 978-0747551003 + serial-number: { isbn: 978-0747551003 } page-total: 768 - date: 2003-06-21 + date: ~2003-06-21 science-e-issue: type: Web @@ -100,7 +100,10 @@ science-e-issue: given-name: "Laurenz" alias: "laurmaedje" date: 2020-07-18 - parent: { type: Repository, title: Typst, url: https://github.com/typst/typst } + parent: + type: Repository + title: Typst + url: https://github.com/typst/typst terminator-2: type: Video @@ -151,7 +154,8 @@ kinetics: type: Article title: Kinetics and luminescence of the excitations of a nonequilibrium polariton condensate author: ["Doan, T. D.", "Tran Thoai, D. B.", "Haug, Hartmut"] - doi: 10.1103/PhysRevB.102.165126 + serial-number: + doi: 10.1103/PhysRevB.102.165126 page-total: 13 page-range: 165126-165139 date: 2020-10-14 @@ -167,9 +171,10 @@ house: type: Article title: Teaching medicine with the help of "Dr. House" author: ["Jerrentrup, Andreas", "Mueller, Tobias", "Glowalla, Ulrich", "Herder, Meike", "Henrichs, Nadine", "Neubauer, Andreas", "Schaefer, Juergen R."] - doi: 10.1371/journal.pone.0193972 + serial-number: + doi: 10.1371/journal.pone.0193972 + serial: e0193972 date: 2018-03-13 - serial-number: e0193972 parent: title: value: PLoS ONE @@ -230,7 +235,7 @@ gedanken: editor: ["Shannon, C. E.", "McCarthy, J."] volume: 34 date: 1956-04 - isbn: 978-0-691-07916-5 + serial-number: { isbn: 978-0-691-07916-5 } location: Princeton, NJ, USA publisher: NBS parent: @@ -238,7 +243,7 @@ gedanken: title: Annals of Mathematics Studies georgia: - type: Tweet + type: post author: Silver, Nate title: Trump's claim to have won Georgia is highly dubious. No network has called it. He's only ahead by 2.5 points there, and the outstanding votes are mostly mail votes in very blue counties, likely very Democratic. Biden may even be a slight favorite there. url: https://twitter.com/NateSilver538/status/1323889051037028353 @@ -294,7 +299,7 @@ un-hdr: author: United Nations Development Programme title: Human Development Report 2019 location: New York - issn: 2412-3129 + serial-number: { issn: 2412-3129 } url: http://hdr.undp.org/sites/default/files/hdr2019.pdf date: 2019 @@ -376,7 +381,8 @@ overleaf: latex-users: type: Article title: How many scholarly articles are written in LaTeX? - doi: 10.22541/au.148771883.35456290 + serial-number: + doi: 10.22541/au.148771883.35456290 author: Pepe, Alberto date: 2017-02-21 parent: diff --git a/tests/data/locales-en-US.xml b/tests/data/locales-en-US.xml new file mode 100644 index 0000000..537a3ae --- /dev/null +++ b/tests/data/locales-en-US.xml @@ -0,0 +1,656 @@ + + + + + Andrew Dunning + + + Sebastian Karcher + + + Rintze M. Zelle + + + Denis Meier + + + Brenton M. Wiernik + + This work is licensed under a + Creative Commons Attribution-ShareAlike 3.0 License + 2015-10-10T23:31:02+00:00 + + + + + + + + + + + + + + advance online publication + album + audio recording + film + henceforth + loc. cit. + no place + n.p. + no publisher + n.p. + on + op. cit. + original work published + personal communication + podcast + podcast episode + preprint + radio broadcast + radio series + radio series episode + special issue + special section + television broadcast + television series + television series episode + video + working paper + accessed + and + and others + anonymous + anon. + at + available at + by + circa + c. + cited + + edition + editions + + ed. + et al. + forthcoming + from + ibid. + in + in press + internet + letter + no date + n.d. + online + presented at the + + reference + references + + + ref. + refs. + + retrieved + scale + version + + + preprint + journal article + magazine article + newspaper article + bill + + broadcast + + classic + collection + dataset + document + entry + dictionary entry + encyclopedia entry + event + + graphic + hearing + interview + legal case + legislation + manuscript + map + video recording + musical score + pamphlet + conference paper + patent + performance + periodical + personal communication + post + blog post + regulation + report + review + book review + software + audio recording + presentation + standard + thesis + treaty + webpage + + + journal art. + mag. art. + newspaper art. + + + doc. + + graph. + interv. + MS + video rec. + rep. + rev. + bk. rev. + audio rec. + + + AD + BC + BCE + CE + + + + + + + + : + , + ; + + + th + st + nd + rd + th + th + th + + + first + second + third + fourth + fifth + sixth + seventh + eighth + ninth + tenth + + + + act + acts + + + appendix + appendices + + + article + articles + + + canon + canons + + + location + locations + + + equation + equations + + + rule + rules + + + scene + scenes + + + table + tables + + + + + + + title + titles + + + book + books + + + chapter + chapters + + + column + columns + + + figure + figures + + + folio + folios + + + number + numbers + + + line + lines + + + note + notes + + + opus + opera + + + page + pages + + + page + pages + + + paragraph + paragraphs + + + part + parts + + + section + sections + + + sub verbo + sub verbis + + + verse + verses + + + volume + volumes + + + + + app. + apps. + + + art. + arts. + + + loc. + locs. + + + eq. + eqs. + + + r. + rr. + + + sc. + scs. + + + tbl. + tbls. + + + + + + + tit. + tits. + + + bk. + bks. + + + chap. + chaps. + + + col. + cols. + + + fig. + figs. + + + fol. + fols. + + + no. + nos. + + + l. + ll. + + + n. + nn. + + + op. + opp. + + + p. + pp. + + + p. + pp. + + + para. + paras. + + + pt. + pts. + + + sec. + secs. + + + s.v. + s.vv. + + + v. + vv. + + + vol. + vols. + + + + + + ¶¶ + + + § + §§ + + + + + chair + chairs + + + compiler + compilers + + + contributor + contributors + + + curator + curators + + + executive producer + executive producers + + + guest + guests + + + host + hosts + + + narrator + narrators + + + organizer + organizers + + + performer + performers + + + producer + producers + + + writer + writers + + + series creator + series creators + + + director + directors + + + editor + editors + + + editor + editors + + + illustrator + illustrators + + + translator + translators + + + editor & translator + editors & translators + + + + + comp. + comps. + + + contrib. + contribs. + + + cur. + curs. + + + exec. prod. + exec. prods. + + + narr. + narrs. + + + org. + orgs. + + + perf. + perfs. + + + prod. + prods. + + + writ. + writs. + + + cre. + cres. + + + dir. + dirs. + + + ed. + eds. + + + ed. + eds. + + + ill. + ills. + + + tran. + trans. + + + ed. & tran. + eds. & trans. + + + + chaired by + compiled by + with + curated by + executive produced by + with guest + hosted by + narrated by + organized by + performed by + produced by + written by + created by + by + directed by + edited by + edited by + illustrated by + interview by + to + by + translated by + edited & translated by + + + comp. by + w. + cur. by + exec. prod. by + w. guest + hosted by + narr. by + org. by + perf. by + prod. by + writ. by + cre. by + dir. by + ed. by + ed. by + illus. by + trans. by + ed. & trans. by + + + January + February + March + April + May + June + July + August + September + October + November + December + + + Jan. + Feb. + Mar. + Apr. + May + Jun. + Jul. + Aug. + Sep. + Oct. + Nov. + Dec. + + + Spring + Summer + Autumn + Winter + + \ No newline at end of file diff --git a/tests/lotr.bib b/tests/data/lotr.bib similarity index 100% rename from tests/lotr.bib rename to tests/data/lotr.bib