diff --git a/crates/oxc_parser/examples/parser_tsx.rs b/crates/oxc_parser/examples/parser_tsx.rs new file mode 100644 index 0000000000000..4ba88838e19cb --- /dev/null +++ b/crates/oxc_parser/examples/parser_tsx.rs @@ -0,0 +1,40 @@ +use oxc_allocator::Allocator; +use oxc_parser::{Parser, ParserReturn}; +use oxc_span::SourceType; + +fn main() { + let source_text = r" +import React from 'react'; + +/** + * A simple counter component + */ +export const Counter: React.FC = () => { + const [count, setCount] = React.useState(0); + + return ( +
+

Count: {count}

+ + +
+ ) +}"; + + // Memory arena where AST nodes get stored + let allocator = Allocator::default(); + // Infers TypeScript + JSX + ESM modules + let source_type = SourceType::from_path("Counter.tsx").unwrap(); + + let ParserReturn { + program, // AST + errors, // Syntax errors + panicked, // Parser encountered an error it couldn't recover from + trivias, // Comments, whitespace, etc. + } = Parser::new(&allocator, source_text, source_type).parse(); + + assert!(!panicked); + assert!(errors.is_empty()); + assert!(!program.body.is_empty()); + assert_eq!(trivias.comments().count(), 1); +} diff --git a/crates/oxc_parser/src/lexer/kind.rs b/crates/oxc_parser/src/lexer/kind.rs index ecaa14248789c..4d8f239e9d55e 100644 --- a/crates/oxc_parser/src/lexer/kind.rs +++ b/crates/oxc_parser/src/lexer/kind.rs @@ -2,6 +2,9 @@ use std::fmt; +/// Lexer token kind +/// +/// Exported for other oxc crates to use. You generally don't need to use this directly. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] #[non_exhaustive] pub enum Kind { @@ -244,8 +247,11 @@ impl Kind { matches!(self, Ident) || self.is_all_keyword() } - /// Check the succeeding token of a `let` keyword - // let { a, b } = c, let [a, b] = c, let ident + /// Check the succeeding token of a `let` keyword. + /// + /// ```javascript + /// let { a, b } = c, let [a, b] = c, let ident + /// ``` pub fn is_after_let(self) -> bool { self != Self::In && (matches!(self, LCurly | LBrack | Ident) || self.is_all_keyword()) } diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index 8dd4cf90e455e..63fd7f575ad0c 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -1,26 +1,48 @@ //! Oxc Parser for JavaScript and TypeScript //! +//! Oxc's [`Parser`] has full support for +//! - The latest stable ECMAScript syntax +//! - TypeScript +//! - JSX and TSX +//! - [Stage 3 Decorators](https://github.com/tc39/proposal-decorator-metadata) +//! +//! # Usage +//! +//! The parser has a minimal API with three inputs (a [memory arena](oxc_allocator::Allocator), a +//! source string, and a [`SourceType`]) and one return struct (a [ParserReturn]). +//! +//! ```rust +//! let parser_return = Parser::new(&allocator, &source_text, source_type).parse(); +//! ``` +//! +//! # Abstract Syntax Tree (AST) +//! Oxc's AST is located in a separate [`oxc_ast`] crate. You can find type definitions for AST +//! nodes [here][`oxc_ast::ast`]. +//! //! # Performance //! //! The following optimization techniques are used: //! * AST is allocated in a memory arena ([bumpalo](https://docs.rs/bumpalo)) for fast AST drop -//! * [oxc_span::Span] offsets uses `u32` instead of `usize` +//! * [`oxc_span::Span`] offsets uses `u32` instead of `usize` //! * Scope binding, symbol resolution and complicated syntax errors are not done in the parser, //! they are delegated to the [semantic analyzer](https://docs.rs/oxc_semantic) //! -//! # Usage +//!
+//! Because [`oxc_span::Span`] uses `u32` instead of `usize`, Oxc can only parse files up +//! to 4 GiB in size. This shouldn't be a limitation in almost all cases. +//!
//! -//! The parser has a minimal API with three inputs and one return struct ([ParserReturn]). +//! # Examples +//! +//! //! //! ```rust -//! let parser_return = Parser::new(&allocator, &source_text, source_type).parse(); +#![doc = include_str!("../examples/parser.rs")] //! ``` //! -//! # Example -//! -//! +//! ### Parsing TSX //! ```rust -#![doc = include_str!("../examples/parser.rs")] +#![doc = include_str!("../examples/parser_tsx.rs")] //! ``` //! //! # Visitor @@ -91,39 +113,91 @@ pub const MAX_LEN: usize = if std::mem::size_of::() >= 8 { isize::MAX as usize }; -/// Return value of parser consisting of AST, errors and comments +/// Return value of [`Parser::parse`] consisting of AST, errors and comments +/// +/// ## AST Validity +/// +/// [`program`] will always contain a structurally valid AST, even if there are syntax errors. +/// However, the AST may be semantically invalid. To ensure a valid AST, +/// 1. Check that [`errors`] is empty +/// 2. Run semantic analysis with [syntax error checking +/// enabled](https://docs.rs/oxc_semantic/latest/oxc_semantic/struct.SemanticBuilder.html#method.with_check_syntax_error) +/// +/// ## Errors +/// Oxc's [`Parser`] is able to recover from some syntax errors and continue parsing. When this +/// happens, +/// 1. [`errors`] will be non-empty +/// 2. [`program`] will contain a full AST +/// 3. [`panicked`] will be false +/// +/// When the parser cannot recover, it will abort and terminate parsing early. [`program`] will +/// be empty and [`panicked`] will be `true`. /// -/// The parser always return a valid AST. -/// When `panicked = true`, then program will always be empty. -/// When `errors.len() > 0`, then program may or may not be empty due to error recovery. +/// [`program`]: ParserReturn::program +/// [`errors`]: ParserReturn::errors +/// [`panicked`]: ParserReturn::panicked pub struct ParserReturn<'a> { + /// The parsed AST. + /// + /// Will be empty (e.g. no statements, directives, etc) if the parser panicked. + /// + /// ## Validity + /// It is possible for the AST to be present and semantically invalid. This will happen if + /// 1. The [`Parser`] encounters a recoverable syntax error + /// 2. The logic for checking the violation is in the semantic analyzer + /// + /// To ensure a valid AST, check that [`errors`](ParserReturn::errors) is empty. Then, run + /// semantic analysis with syntax error checking enabled. pub program: Program<'a>, + + /// Syntax errors encountered while parsing. + /// + /// This list is not comprehensive. Oxc offloads more-expensive checks to [semantic + /// analysis](https://docs.rs/oxc_semantic), which can be enabled using + /// [`SemanticBuilder::with_check_syntax_error`](https://docs.rs/oxc_semantic/latest/oxc_semantic/struct.SemanticBuilder.html#method.with_check_syntax_error). pub errors: Vec, + + /// Comments and whitespace pub trivias: Trivias, + + /// Whether the parser panicked and terminated early. + /// + /// This will be `false` if parsing was successful, or if parsing was able to recover from a + /// syntax error. When `true`, [`program`] will be empty and [`errors`] will contain at least + /// one error. + /// + /// [`program`]: ParserReturn::program + /// [`errors`]: ParserReturn::errors pub panicked: bool, } /// Parse options +/// +/// You may provide options to the [`Parser`] using [`Parser::with_options`]. #[derive(Debug, Clone, Copy)] pub struct ParseOptions { /// Whether to parse regular expressions or not. /// - /// Default: false + /// Default: `false` pub parse_regular_expression: bool, - /// Allow return outside of function + /// Allow [`return`] statements outside of functions. /// - /// By default, a return statement at the top level raises an error. - /// Set this to true to accept such code. + /// By default, a return statement at the top level raises an error (`false`). + /// Set this to `true` to accept such code. + /// + /// [`return`]: oxc_ast::ast::ReturnStatement pub allow_return_outside_function: bool, - /// Emit `ParenthesizedExpression` in AST. + /// Emit [`ParenthesizedExpression`]s in AST. /// - /// If this option is true, parenthesized expressions are represented by - /// (non-standard) `ParenthesizedExpression` nodes that have a single `expression` property + /// If this option is `true`, parenthesized expressions are represented by + /// (non-standard) [`ParenthesizedExpression`] nodes that have a single `expression` property /// containing the expression inside parentheses. /// - /// Default: true + /// Default: `true` + /// + /// [`ParenthesizedExpression`]: oxc_ast::ast::ParenthesizedExpression pub preserve_parens: bool, } @@ -148,12 +222,18 @@ pub struct Parser<'a> { } impl<'a> Parser<'a> { - /// Create a new parser + /// Create a new [`Parser`] + /// + /// # Parameters + /// - `allocator`: [Memory arena](oxc_allocator::Allocator) for allocating AST nodes + /// - `source_text`: Source code to parse + /// - `source_type`: Source type (e.g. JavaScript, TypeScript, JSX, ESM Module, Script) pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self { let options = ParseOptions::default(); Self { allocator, source_text, source_type, options } } + /// Set parse options #[must_use] pub fn with_options(mut self, options: ParseOptions) -> Self { self.options = options; @@ -200,6 +280,8 @@ mod parser_parse { /// /// Returns an empty `Program` on unrecoverable error, /// Recoverable errors are stored inside `errors`. + /// + /// See the [module-level documentation](crate) for examples and more information. pub fn parse(self) -> ParserReturn<'a> { let unique = UniquePromise::new(); let parser = ParserImpl::new( @@ -212,11 +294,25 @@ mod parser_parse { parser.parse() } - /// Parse `Expression` + /// Parse a single [`Expression`]. /// - /// # Errors + /// # Example /// - /// * Syntax Error + /// ```rust + /// use oxc_allocator::Allocator; + /// use oxc_ast::ast::Expression; + /// use oxc_parser::Parser; + /// use oxc_span::SourceType; + /// + /// let src = "let x = 1 + 2;"; + /// let allocator = Allocator::new(); + /// let source_type = SourceType::default(); + /// + /// let expr: Expression<'_> = Parser::new(&allocator, src, source_type).parse_expression().unwrap(); + /// ``` + /// + /// # Errors + /// If the source code being parsed has syntax errors. pub fn parse_expression(self) -> std::result::Result, Vec> { let unique = UniquePromise::new(); let parser = ParserImpl::new(