Merge pull request #851 from CBenoit/master

Add include by anchor in preprocessor (partial include)
rust-lang · Jul 15, 2019 · e7c3d02 · e7c3d02
2 parents d6088c8 + d8a68ba
commit e7c3d02
Show file tree

Hide file tree

Showing 4 changed files with 163 additions and 8 deletions.
diff --git a/book-example/src/format/mdbook.md b/book-example/src/format/mdbook.md
@@ -63,6 +63,50 @@ the file are omitted. The third command includes all lines from line 2, i.e. the
 first line is omitted. The last command includes the excerpt of `file.rs`
 consisting of lines 2 to 10.
 
+To avoid breaking your book when modifying included files, you can also
+include a specific section using anchors instead of line numbers.
+An anchor is a pair of matching lines. The line beginning an anchor must
+match the regex "ANCHOR:\s*[\w_-]+" and similarly the ending line must match
+the regex "ANCHOR_END:\s*[\w_-]+". This allows you to put anchors in
+any kind of commented line.
+
+Consider the following file to include:
+```rs
+/* ANCHOR: all */
+
+// ANCHOR: component
+struct Paddle {
+    hello: f32,
+}
+// ANCHOR_END: component
+
+////////// ANCHOR: system
+impl System for MySystem { ... }
+////////// ANCHOR_END: system
+
+/* ANCHOR_END: all */
+```
+
+Then in the book, all you have to do is:
+````hbs
+Here is a component:
+```rust,no_run,noplaypen
+\{{#include file.rs:component}}
+```
+
+Here is a system:
+```rust,no_run,noplaypen
+\{{#include file.rs:system}}
+```
+
+This is the full file.
+```rust,no_run,noplaypen
+\{{#include file.rs:all}}
+```
+````
+
+Lines containing anchor patterns inside the included anchor are ignored.
+
 ## Inserting runnable Rust files
 
 With the following syntax, you can insert runnable Rust files into your book:

diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs
@@ -1,5 +1,5 @@
 use crate::errors::*;
-use crate::utils::take_lines;
+use crate::utils::{take_anchored_lines, take_lines};
 use regex::{CaptureMatches, Captures, Regex};
 use std::fs;
 use std::ops::{Range, RangeFrom, RangeFull, RangeTo};
@@ -106,6 +106,7 @@ enum LinkType<'a> {
     IncludeRangeFrom(PathBuf, RangeFrom<usize>),
     IncludeRangeTo(PathBuf, RangeTo<usize>),
     IncludeRangeFull(PathBuf, RangeFull),
+    IncludeAnchor(PathBuf, String),
     Playpen(PathBuf, Vec<&'a str>),
 }
 
@@ -118,6 +119,7 @@ impl<'a> LinkType<'a> {
             LinkType::IncludeRangeFrom(p, _) => Some(return_relative_path(base, &p)),
             LinkType::IncludeRangeTo(p, _) => Some(return_relative_path(base, &p)),
             LinkType::IncludeRangeFull(p, _) => Some(return_relative_path(base, &p)),
+            LinkType::IncludeAnchor(p, _) => Some(return_relative_path(base, &p)),
             LinkType::Playpen(p, _) => Some(return_relative_path(base, &p)),
         }
     }
@@ -133,11 +135,21 @@ fn return_relative_path<P: AsRef<Path>>(base: P, relative: P) -> PathBuf {
 fn parse_include_path(path: &str) -> LinkType<'static> {
     let mut parts = path.split(':');
     let path = parts.next().unwrap().into();
-    // subtract 1 since line numbers usually begin with 1
-    let start = parts
-        .next()
-        .and_then(|s| s.parse::<usize>().ok())
-        .map(|val| val.saturating_sub(1));
+
+    let next_element = parts.next();
+    let start = if let Some(value) = next_element.and_then(|s| s.parse::<usize>().ok()) {
+        // subtract 1 since line numbers usually begin with 1
+        Some(value.saturating_sub(1))
+    } else if let Some(anchor) = next_element {
+        if anchor == "" {
+            None
+        } else {
+            return LinkType::IncludeAnchor(path, String::from(anchor));
+        }
+    } else {
+        None
+    };
+
     let end = parts.next();
     let has_end = end.is_some();
     let end = end.and_then(|s| s.parse::<usize>().ok());
@@ -258,6 +270,19 @@ impl<'a> Link<'a> {
                     )
                 })
             }
+            LinkType::IncludeAnchor(ref pat, ref anchor) => {
+                let target = base.join(pat);
+
+                fs::read_to_string(&target)
+                    .map(|s| take_anchored_lines(&s, anchor))
+                    .chain_err(|| {
+                        format!(
+                            "Could not read file for link {} ({})",
+                            self.link_text,
+                            target.display(),
+                        )
+                    })
+            }
             LinkType::Playpen(ref pat, ref attrs) => {
                 let target = base.join(pat);
 
@@ -482,6 +507,25 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_find_links_with_anchor() {
+        let s = "Some random text with {{#include file.rs:anchor}}...";
+        let res = find_links(s).collect::<Vec<_>>();
+        println!("\nOUTPUT: {:?}\n", res);
+        assert_eq!(
+            res,
+            vec![Link {
+                start_index: 22,
+                end_index: 49,
+                link_type: LinkType::IncludeAnchor(
+                    PathBuf::from("file.rs"),
+                    String::from("anchor")
+                ),
+                link_text: "{{#include file.rs:anchor}}",
+            }]
+        );
+    }
+
     #[test]
     fn test_find_links_escaped_link() {
         let s = "Some random text with escaped playpen \\{{#playpen file.rs editable}} ...";

diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -11,7 +11,7 @@ use std::borrow::Cow;
 use std::fmt::Write;
 use std::path::Path;
 
-pub use self::string::take_lines;
+pub use self::string::{take_anchored_lines, take_lines};
 
 /// Replaces multiple consecutive whitespace characters with a single space character.
 pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {

diff --git a/src/utils/string.rs b/src/utils/string.rs
@@ -1,4 +1,5 @@
 use itertools::Itertools;
+use regex::Regex;
 use std::ops::Bound::{Excluded, Included, Unbounded};
 use std::ops::RangeBounds;
 
@@ -17,9 +18,46 @@ pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
     }
 }
 
+/// Take anchored lines from a string.
+/// Lines containing anchor are ignored.
+pub fn take_anchored_lines(s: &str, anchor: &str) -> String {
+    lazy_static! {
+        static ref RE_START: Regex = Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap();
+        static ref RE_END: Regex = Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap();
+    }
+
+    let mut retained = Vec::<&str>::new();
+    let mut anchor_found = false;
+
+    for l in s.lines() {
+        if anchor_found {
+            match RE_END.captures(l) {
+                Some(cap) => {
+                    if &cap["anchor_name"] == anchor {
+                        break;
+                    }
+                }
+                None => {
+                    if !RE_START.is_match(l) {
+                        retained.push(l);
+                    }
+                }
+            }
+        } else {
+            if let Some(cap) = RE_START.captures(l) {
+                if &cap["anchor_name"] == anchor {
+                    anchor_found = true;
+                }
+            }
+        }
+    }
+
+    retained.join("\n")
+}
+
 #[cfg(test)]
 mod tests {
-    use super::take_lines;
+    use super::{take_anchored_lines, take_lines};
 
     #[test]
     fn take_lines_test() {
@@ -32,4 +70,33 @@ mod tests {
         assert_eq!(take_lines(s, 4..3), "");
         assert_eq!(take_lines(s, ..100), s);
     }
+
+    #[test]
+    fn take_anchored_lines_test() {
+        let s = "Lorem\nipsum\ndolor\nsit\namet";
+        assert_eq!(take_anchored_lines(s, "test"), "");
+
+        let s = "Lorem\nipsum\ndolor\nANCHOR_END: test\nsit\namet";
+        assert_eq!(take_anchored_lines(s, "test"), "");
+
+        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet";
+        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
+        assert_eq!(take_anchored_lines(s, "something"), "");
+
+        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
+        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
+        assert_eq!(take_anchored_lines(s, "something"), "");
+
+        let s = "Lorem\nANCHOR: test\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
+        assert_eq!(take_anchored_lines(s, "test"), "ipsum\ndolor\nsit\namet");
+        assert_eq!(take_anchored_lines(s, "something"), "");
+
+        let s = "Lorem\nANCHOR:    test2\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nANCHOR_END:test2\nipsum";
+        assert_eq!(
+            take_anchored_lines(s, "test2"),
+            "ipsum\ndolor\nsit\namet\nlorem"
+        );
+        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
+        assert_eq!(take_anchored_lines(s, "something"), "");
+    }
 }