From ae13e2ae41a3450cb7430daa3f82d271954ec6ef Mon Sep 17 00:00:00 2001
From: Denis Bezrukov <6227442+denbezrukov@users.noreply.github.com>
Date: Mon, 10 Jul 2023 23:06:22 +0300
Subject: [PATCH] feat(rome_css_parser): CSS lexer #4682

---
 Cargo.lock                                   |  19 +-
 Cargo.toml                                   |   4 +-
 crates/rome_css_parser/Cargo.toml            |  30 ++
 crates/rome_css_parser/LICENSE               |  21 +
 crates/rome_css_parser/README.md             |  36 ++
 crates/rome_css_parser/src/lexer/mod.rs      | 499 +++++++++++++++++++
 crates/rome_css_parser/src/lexer/tests.rs    | 233 +++++++++
 crates/rome_css_parser/src/lib.rs            |   4 +
 crates/rome_css_parser/src/prelude.rs        |   2 +
 crates/rome_css_syntax/Cargo.toml            |   5 +-
 crates/rome_css_syntax/src/generated/kind.rs |   1 +
 crates/rome_json_parser/src/lexer/mod.rs     |   2 +-
 xtask/codegen/src/css_kinds_src.rs           |   9 +-
 13 files changed, 859 insertions(+), 6 deletions(-)
 create mode 100644 crates/rome_css_parser/Cargo.toml
 create mode 100644 crates/rome_css_parser/LICENSE
 create mode 100644 crates/rome_css_parser/README.md
 create mode 100644 crates/rome_css_parser/src/lexer/mod.rs
 create mode 100644 crates/rome_css_parser/src/lexer/tests.rs
 create mode 100644 crates/rome_css_parser/src/lib.rs
 create mode 100644 crates/rome_css_parser/src/prelude.rs

diff --git a/Cargo.lock b/Cargo.lock
index c905ade6102..f35bfa0d43f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1746,9 +1746,26 @@ dependencies = [
  "rome_rowan",
 ]
 
+[[package]]
+name = "rome_css_parser"
+version = "0.0.1"
+dependencies = [
+ "insta",
+ "quickcheck",
+ "quickcheck_macros",
+ "rome_console",
+ "rome_css_syntax",
+ "rome_diagnostics",
+ "rome_js_unicode_table",
+ "rome_parser",
+ "rome_rowan",
+ "tests_macros",
+ "tracing",
+]
+
 [[package]]
 name = "rome_css_syntax"
-version = "0.0.0"
+version = "0.0.1"
 dependencies = [
  "rome_rowan",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 8c24f0b470b..e510bd3962f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,7 +31,6 @@ rome_aria_metadata          = { path = "./crates/rome_aria_metadata" }
 rome_cli                    = { path = "./crates/rome_cli" }
 rome_console                = { version = "0.0.1", path = "./crates/rome_console" }
 rome_control_flow           = { path = "./crates/rome_control_flow" }
-rome_css_syntax             = { path = "./crates/rome_css_syntax" }
 rome_deserialize            = { version = "0.0.0", path = "./crates/rome_deserialize" }
 rome_diagnostics            = { version = "0.0.1", path = "./crates/rome_diagnostics" }
 rome_diagnostics_categories = { version = "0.0.1", path = "./crates/rome_diagnostics_categories" }
@@ -52,6 +51,9 @@ rome_json_factory           = { version = "0.0.1", path = "./crates/rome_json_fa
 rome_json_formatter         = { path = "./crates/rome_json_formatter" }
 rome_json_parser            = { path = "./crates/rome_json_parser" }
 rome_json_syntax            = { version = "0.0.1", path = "./crates/rome_json_syntax" }
+rome_css_factory            = { path = "./crates/rome_css_factory" }
+rome_css_parser             = { path = "./crates/rome_css_parser" }
+rome_css_syntax             = { path = "./crates/rome_css_syntax" }
 rome_lsp                    = { path = "./crates/rome_lsp" }
 rome_markup                 = { version = "0.0.1", path = "./crates/rome_markup" }
 rome_migrate                = { path = "./crates/rome_migrate" }
diff --git a/crates/rome_css_parser/Cargo.toml b/crates/rome_css_parser/Cargo.toml
new file mode 100644
index 00000000000..3db3a96d5c8
--- /dev/null
+++ b/crates/rome_css_parser/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+authors.workspace    = true
+categories           = ["parser-implementations", "development-tools"]
+description          = "An extremely fast CSS parser"
+documentation        = "https://rustdocs.rome.tools/rome_css_parser/index.html"
+edition.workspace    = true
+homepage.workspace   = true
+license.workspace    = true
+name                 = "rome_css_parser"
+repository.workspace = true
+version              = "0.0.1"
+
+[dependencies]
+rome_console          = { workspace = true }
+rome_diagnostics      = { workspace = true }
+rome_js_unicode_table = { workspace = true }
+rome_css_syntax       = { workspace = true }
+rome_parser           = { workspace = true }
+rome_rowan            = { workspace = true }
+tracing               = { workspace = true }
+
+[dev-dependencies]
+insta             = { workspace = true }
+quickcheck        = "1.0.3"
+quickcheck_macros = "1.0.0"
+tests_macros      = { workspace = true }
+
+# cargo-workspaces metadata
+[package.metadata.workspaces]
+independent = true
diff --git a/crates/rome_css_parser/LICENSE b/crates/rome_css_parser/LICENSE
new file mode 100644
index 00000000000..0c74aa6a873
--- /dev/null
+++ b/crates/rome_css_parser/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Rome Tools, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/crates/rome_css_parser/README.md b/crates/rome_css_parser/README.md
new file mode 100644
index 00000000000..26047a3b77e
--- /dev/null
+++ b/crates/rome_css_parser/README.md
@@ -0,0 +1,36 @@
+<p align="center">
+	<picture>
+		<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/rome/brand/main/PNG/logo_white_yellow_transparent.png" width="700">
+		<img alt="Rome's logo depicting an ancient Roman arch with the word Rome to its side" src="https://raw.githubusercontent.com/rome/brand/main/PNG/logo_transparent.png" width="700">
+	</picture>
+</p>
+
+<div align="center">
+
+[![MIT licensed][mit-badge]][mit-url]
+[![Discord chat][discord-badge]][discord-url]
+[![CI on main][ci-badge]][ci-url]
+[![npm version][npm-badge]][npm-url]
+[![VSCode version][vscode-badge]][vscode-url]
+[![cargo version][cargo-badge]][cargo-url]
+
+
+[mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg?color=brightgreen
+[mit-url]: LICENSE
+[discord-badge]: https://img.shields.io/discord/678763474494423051?logo=discord&label=discord&color=brightgreen
+[discord-url]: https://discord.gg/rome
+[ci-badge]: https://github.com/rome/tools/actions/workflows/main.yml/badge.svg
+[ci-url]: https://github.com/rome/tools/actions/workflows/main.yml
+[npm-badge]: https://img.shields.io/npm/v/rome/latest?color=brightgreen
+[npm-url]: https://www.npmjs.com/package/rome/v/latest
+[vscode-badge]: https://img.shields.io/visual-studio-marketplace/v/rome.rome?color=brightgreen&label=vscode
+[vscode-url]: https://marketplace.visualstudio.com/items?itemName=rome.rome
+[cargo-badge]: https://img.shields.io/crates/v/rome_css_parser?&color=brightgreen
+[cargo-url]: https://crates.io/crates/rome_css_parser
+
+</div>
+
+# `rome_css_parser`
+
+Rome's CSS parser implementation. Follow the [documentation](https://rustdocs.rome.tools/rome_css_parser/index.html).
+
diff --git a/crates/rome_css_parser/src/lexer/mod.rs b/crates/rome_css_parser/src/lexer/mod.rs
new file mode 100644
index 00000000000..1ad4efe6c8f
--- /dev/null
+++ b/crates/rome_css_parser/src/lexer/mod.rs
@@ -0,0 +1,499 @@
+//! An extremely fast, lookup table based, СSS lexer which yields SyntaxKind tokens used by the rome-css parser.
+#![allow(dead_code)]
+
+#[rustfmt::skip]
+mod tests;
+
+use rome_css_syntax::{CssSyntaxKind, CssSyntaxKind::*, TextLen, TextRange, TextSize, T};
+use rome_js_unicode_table::{lookup_byte, Dispatch::*};
+use rome_parser::diagnostic::ParseDiagnostic;
+use std::char::REPLACEMENT_CHARACTER;
+use std::iter::FusedIterator;
+
+pub struct Token {
+    kind: CssSyntaxKind,
+    range: TextRange,
+}
+
+impl Token {
+    pub fn kind(&self) -> CssSyntaxKind {
+        self.kind
+    }
+
+    pub fn range(&self) -> TextRange {
+        self.range
+    }
+}
+
+/// An extremely fast, lookup table based, lossless CSS lexer
+#[derive(Debug)]
+pub(crate) struct Lexer<'src> {
+    /// Source text
+    source: &'src str,
+
+    /// The start byte position in the source text of the next token.
+    position: usize,
+
+    diagnostics: Vec<ParseDiagnostic>,
+}
+
+impl<'src> Lexer<'src> {
+    /// Make a new lexer from a str, this is safe because strs are valid utf8
+    pub fn from_str(source: &'src str) -> Self {
+        Self {
+            source,
+            position: 0,
+            diagnostics: vec![],
+        }
+    }
+
+    /// Returns the source code
+    pub fn source(&self) -> &'src str {
+        self.source
+    }
+
+    pub fn finish(self) -> Vec<ParseDiagnostic> {
+        self.diagnostics
+    }
+
+    /// Lexes the next token.
+    ///
+    /// ## Return
+    /// Returns its kind and any potential error.
+    pub(crate) fn next_token(&mut self) -> Option<Token> {
+        let start = self.text_position();
+
+        match self.current_byte() {
+            Some(current) => {
+                let kind = self.lex_token(current);
+
+                debug_assert!(start < self.text_position(), "Lexer did not progress");
+                Some(Token {
+                    kind,
+                    range: TextRange::new(start, self.text_position()),
+                })
+            }
+            None if self.position == self.source.len() => {
+                self.advance(1);
+                Some(Token {
+                    kind: EOF,
+                    range: TextRange::new(start, start),
+                })
+            }
+            None => None,
+        }
+    }
+
+    fn text_position(&self) -> TextSize {
+        TextSize::try_from(self.position).expect("Input to be smaller than 4 GB")
+    }
+
+    /// Bumps the current byte and creates a lexed token of the passed in kind
+    fn eat_byte(&mut self, tok: CssSyntaxKind) -> CssSyntaxKind {
+        self.advance(1);
+        tok
+    }
+
+    /// Consume just one newline/line break.
+    ///
+    /// ## Safety
+    /// Must be called at a valid UT8 char boundary
+    fn consume_newline(&mut self) -> bool {
+        self.assert_at_char_boundary();
+
+        match self.current_byte() {
+            Some(b'\n') => {
+                self.advance(1);
+                true
+            }
+            Some(b'\r') => {
+                if self.peek_byte() == Some(b'\n') {
+                    self.advance(2)
+                } else {
+                    self.advance(1)
+                }
+                true
+            }
+
+            _ => false,
+        }
+    }
+
+    /// Consumes all whitespace until a non-whitespace or a newline is found.
+    ///
+    /// ## Safety
+    /// Must be called at a valid UT8 char boundary
+    fn consume_whitespaces(&mut self) {
+        self.assert_at_char_boundary();
+
+        while let Some(byte) = self.current_byte() {
+            let dispatch = lookup_byte(byte);
+
+            match dispatch {
+                WHS => match byte {
+                    b'\t' | b' ' => self.advance(1),
+                    b'\r' | b'\n' => {
+                        break;
+                    }
+                    _ => {
+                        let start = self.text_position();
+                        self.advance(1);
+
+                        self.diagnostics.push(
+                            ParseDiagnostic::new(
+                                "The CSS standard only allows tabs, whitespace, carriage return and line feed whitespace.",
+                                start..self.text_position(),
+                            )
+                            .hint("Use a regular whitespace character instead."),
+                        )
+                    }
+                },
+
+                _ => break,
+            }
+        }
+    }
+
+    /// Consume one newline or all whitespace until a non-whitespace or a newline is found.
+    ///
+    /// ## Safety
+    /// Must be called at a valid UT8 char boundary
+    fn consume_newline_or_whitespaces(&mut self) -> CssSyntaxKind {
+        if self.consume_newline() {
+            NEWLINE
+        } else {
+            self.consume_whitespaces();
+            WHITESPACE
+        }
+    }
+
+    /// Get the UTF8 char which starts at the current byte
+    ///
+    /// ## Safety
+    /// Must be called at a valid UT8 char boundary
+    fn current_char_unchecked(&self) -> char {
+        // Precautionary measure for making sure the unsafe code below does not read over memory boundary
+        debug_assert!(!self.is_eof());
+        self.assert_at_char_boundary();
+
+        // Safety: We know this is safe because we require the input to the lexer to be valid utf8 and we always call this when we are at a char
+        let string = unsafe {
+            std::str::from_utf8_unchecked(self.source.as_bytes().get_unchecked(self.position..))
+        };
+        let chr = if let Some(chr) = string.chars().next() {
+            chr
+        } else {
+            // Safety: we always call this when we are at a valid char, so this branch is completely unreachable
+            unsafe {
+                core::hint::unreachable_unchecked();
+            }
+        };
+
+        chr
+    }
+
+    /// Gets the current byte.
+    ///
+    /// ## Returns
+    /// The current byte if the lexer isn't at the end of the file.
+    #[inline]
+    fn current_byte(&self) -> Option<u8> {
+        if self.is_eof() {
+            None
+        } else {
+            Some(self.source.as_bytes()[self.position])
+        }
+    }
+
+    /// Asserts that the lexer is at a UTF8 char boundary
+    #[inline]
+    fn assert_at_char_boundary(&self) {
+        debug_assert!(self.source.is_char_boundary(self.position));
+    }
+
+    /// Peeks at the next byte
+    #[inline]
+    fn peek_byte(&self) -> Option<u8> {
+        self.byte_at(1)
+    }
+
+    /// Returns the byte at position `self.position + offset` or `None` if it is out of bounds.
+    #[inline]
+    fn byte_at(&self, offset: usize) -> Option<u8> {
+        self.source.as_bytes().get(self.position + offset).copied()
+    }
+
+    /// Advances the current position by `n` bytes.
+    #[inline]
+    fn advance(&mut self, n: usize) {
+        self.position += n;
+    }
+
+    #[inline]
+    fn advance_byte_or_char(&mut self, chr: u8) {
+        if chr.is_ascii() {
+            self.advance(1);
+        } else {
+            self.advance_char_unchecked();
+        }
+    }
+
+    /// Advances the current position by the current char UTF8 length
+    ///
+    /// ## Safety
+    /// Must be called at a valid UT8 char boundary
+    #[inline]
+    fn advance_char_unchecked(&mut self) {
+        let c = self.current_char_unchecked();
+        self.position += c.len_utf8();
+    }
+
+    /// Returns `true` if the parser is at or passed the end of the file.
+    #[inline]
+    fn is_eof(&self) -> bool {
+        self.position >= self.source.len()
+    }
+
+    /// Lexes the next token
+    ///
+    /// Guaranteed to not be at the end of the file
+    // A lookup table of `byte -> fn(l: &mut Lexer) -> Token` is exponentially slower than this approach
+    fn lex_token(&mut self, current: u8) -> CssSyntaxKind {
+        // The speed difference comes from the difference in table size, a 2kb table is easily fit into cpu cache
+        // While a 16kb table will be ejected from cache very often leading to slowdowns, this also allows LLVM
+        // to do more aggressive optimizations on the match regarding how to map it to instructions
+        let dispatched = lookup_byte(current);
+
+        match dispatched {
+            WHS => self.consume_newline_or_whitespaces(),
+            QOT => self.lex_string_literal(current),
+            SLH => self.lex_slash(),
+
+            PRD => self.eat_byte(T![.]),
+            MUL => self.eat_byte(T![*]),
+            COL => self.eat_byte(T![:]),
+            AT_ => self.eat_byte(T![@]),
+            HAS => self.eat_byte(T![#]),
+            PNO => self.eat_byte(T!['(']),
+            PNC => self.eat_byte(T![')']),
+            BEO => self.eat_byte(T!['{']),
+            BEC => self.eat_byte(T!['}']),
+            BTO => self.eat_byte(T!('[')),
+            BTC => self.eat_byte(T![']']),
+
+            _ => self.eat_unexpected_character(),
+        }
+    }
+
+    fn lex_string_literal(&mut self, quote: u8) -> CssSyntaxKind {
+        self.assert_at_char_boundary();
+        let start = self.text_position();
+
+        self.advance(1); // Skip over the quote
+        let mut state = LexStringState::InString;
+
+        while let Some(chr) = self.current_byte() {
+            let dispatch = lookup_byte(chr);
+
+            match dispatch {
+                QOT if quote == chr => {
+                    self.advance(1);
+                    state = match state {
+                        LexStringState::InString => LexStringState::Terminated,
+                        state => state,
+                    };
+                    break;
+                }
+                // '\t' etc
+                BSL => {
+                    let escape_start = self.text_position();
+                    self.advance(1);
+
+                    match self.current_byte() {
+                        Some(b'\n' | b'\r') => self.advance(1),
+
+                        // Handle escaped `'` but only if this is a end quote string.
+                        Some(b'\'') if quote == b'\'' => {
+                            self.advance(1);
+                        }
+
+                        // Handle escaped `'` but only if this is a end quote string.
+                        Some(b'"') if quote == b'"' => {
+                            self.advance(1);
+                        }
+
+                        Some(c) if c.is_ascii_hexdigit() => {
+                            // SAFETY: We know that the current byte is a hex digit.
+                            let mut hex = (c as char).to_digit(16).unwrap();
+                            self.advance(1);
+
+                            // Consume as many hex digits as possible, but no more than 5.
+                            // Note that this means 1-6 hex digits have been consumed in total.
+                            for _ in 0..5 {
+                                let Some(digit) = self.current_byte()
+                                    .and_then(|c| (c as char).to_digit(16)) else { break; };
+                                self.advance(1);
+
+                                hex = hex * 16 + digit;
+                            }
+
+                            // Interpret the hex digits as a hexadecimal number. If this number is zero, or
+                            // is for a surrogate, or is greater than the maximum allowed code point, return
+                            // U+FFFD REPLACEMENT CHARACTER (�).
+                            let hex = match hex {
+                                // If this number is zero
+                                0 => REPLACEMENT_CHARACTER,
+                                // or is for a surrogate
+                                55296..=57343 => REPLACEMENT_CHARACTER,
+                                // or is greater than the maximum allowed code point
+                                1114112.. => REPLACEMENT_CHARACTER,
+                                _ => char::from_u32(hex).unwrap_or(REPLACEMENT_CHARACTER),
+                            };
+
+                            if hex == REPLACEMENT_CHARACTER {
+                                state = LexStringState::InvalidEscapeSequence;
+
+                                let diagnostic = ParseDiagnostic::new(
+                                    "Invalid escape sequence",
+                                    escape_start..self.text_position(),
+                                );
+                                self.diagnostics.push(diagnostic);
+                            }
+                        }
+
+                        Some(chr) => {
+                            self.advance_byte_or_char(chr);
+                        }
+
+                        None => {}
+                    }
+                }
+                WHS if matches!(chr, b'\n' | b'\r') => {
+                    let unterminated =
+                        ParseDiagnostic::new("Missing closing quote", start..self.text_position())
+                            .detail(self.position..self.position + 1, "line breaks here");
+
+                    self.diagnostics.push(unterminated);
+
+                    return ERROR_TOKEN;
+                }
+                UNI => self.advance_char_unchecked(),
+
+                _ => self.advance(1),
+            }
+        }
+
+        match state {
+            LexStringState::Terminated => CSS_STRING_LITERAL,
+            LexStringState::InString => {
+                let unterminated =
+                    ParseDiagnostic::new("Missing closing quote", start..self.text_position())
+                        .detail(
+                            self.source.text_len()..self.source.text_len(),
+                            "file ends here",
+                        );
+                self.diagnostics.push(unterminated);
+
+                ERROR_TOKEN
+            }
+            LexStringState::InvalidEscapeSequence => ERROR_TOKEN,
+        }
+    }
+
+    /// Lexes a comment.
+    fn lex_slash(&mut self) -> CssSyntaxKind {
+        let start = self.text_position();
+        match self.peek_byte() {
+            Some(b'*') => {
+                // eat `/*`
+                self.advance(2);
+
+                let mut has_newline = false;
+
+                while let Some(chr) = self.current_byte() {
+                    match chr {
+                        b'*' if self.peek_byte() == Some(b'/') => {
+                            self.advance(2);
+
+                            if has_newline {
+                                return MULTILINE_COMMENT;
+                            } else {
+                                return COMMENT;
+                            }
+                        }
+                        b'\n' | b'\r' => {
+                            has_newline = true;
+                            self.advance(1)
+                        }
+                        chr => self.advance_byte_or_char(chr),
+                    }
+                }
+
+                let err =
+                    ParseDiagnostic::new("Unterminated block comment", start..self.text_position())
+                        .detail(
+                            self.position..self.position + 1,
+                            "... but the file ends here",
+                        );
+
+                self.diagnostics.push(err);
+
+                if has_newline {
+                    MULTILINE_COMMENT
+                } else {
+                    COMMENT
+                }
+            }
+            Some(b'/') => {
+                self.advance(2);
+
+                while let Some(chr) = self.current_byte() {
+                    match chr {
+                        b'\n' | b'\r' => return COMMENT,
+                        chr => self.advance_byte_or_char(chr),
+                    }
+                }
+
+                COMMENT
+            }
+            _ => self.eat_unexpected_character(),
+        }
+    }
+
+    #[inline]
+    fn eat_unexpected_character(&mut self) -> CssSyntaxKind {
+        self.assert_at_char_boundary();
+
+        let char = self.current_char_unchecked();
+        let err = ParseDiagnostic::new(
+            format!("unexpected character `{}`", char),
+            self.text_position()..self.text_position() + char.text_len(),
+        );
+        self.diagnostics.push(err);
+        self.advance(char.len_utf8());
+
+        ERROR_TOKEN
+    }
+}
+
+impl Iterator for Lexer<'_> {
+    type Item = Token;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.next_token()
+    }
+}
+
+impl FusedIterator for Lexer<'_> {}
+
+#[derive(Copy, Clone, Debug)]
+enum LexStringState {
+    /// String that contains an invalid escape sequence
+    InvalidEscapeSequence,
+
+    /// Between the opening `"` and closing `"` quotes.
+    InString,
+
+    /// Properly terminated string
+    Terminated,
+}
diff --git a/crates/rome_css_parser/src/lexer/tests.rs b/crates/rome_css_parser/src/lexer/tests.rs
new file mode 100644
index 00000000000..4bc08842f78
--- /dev/null
+++ b/crates/rome_css_parser/src/lexer/tests.rs
@@ -0,0 +1,233 @@
+#![cfg(test)]
+#![allow(unused_mut, unused_variables, unused_assignments)]
+
+use super::{Lexer, TextSize};
+use quickcheck_macros::quickcheck;
+use std::sync::mpsc::channel;
+use std::thread;
+use std::time::Duration;
+
+// Assert the result of lexing a piece of source code,
+// and make sure the tokens yielded are fully lossless and the source can be reconstructed from only the tokens
+macro_rules! assert_lex {
+    ($src:expr, $($kind:ident:$len:expr $(,)?)*) => {{
+        let mut lexer = Lexer::from_str($src);
+        let mut idx = 0;
+        let mut tok_idx = TextSize::default();
+
+        let mut new_str = String::with_capacity($src.len());
+        let tokens: Vec<_> = lexer.collect();
+
+        $(
+            assert_eq!(
+                tokens[idx].kind,
+                rome_css_syntax::CssSyntaxKind::$kind,
+                "expected token kind {}, but found {:?}",
+                stringify!($kind),
+                tokens[idx].kind,
+            );
+
+            assert_eq!(
+                tokens[idx].range.len(),
+                TextSize::from($len),
+                "expected token length of {}, but found {:?} for token {:?}",
+                $len,
+                tokens[idx].range.len(),
+                tokens[idx].kind,
+            );
+
+            new_str.push_str(&$src[tokens[idx].range]);
+            tok_idx += tokens[idx].range.len();
+
+            idx += 1;
+        )*
+
+        if idx < tokens.len() {
+            panic!(
+                "expected {} tokens but lexer returned {}, first unexpected token is '{:?}'",
+                idx,
+                tokens.len(),
+                tokens[idx].kind
+            );
+        } else {
+            assert_eq!(idx, tokens.len());
+        }
+
+        assert_eq!($src, new_str, "Failed to reconstruct input");
+    }};
+}
+
+// This is for testing if the lexer is truly lossless
+// It parses random strings and puts them back together with the produced tokens and compares
+#[quickcheck]
+fn losslessness(string: String) -> bool {
+    // using an mpsc channel allows us to spawn a thread and spawn the lexer there, then if
+    // it takes more than 2 seconds we panic because it is 100% infinite recursion
+    let cloned = string.clone();
+    let (sender, receiver) = channel();
+    thread::spawn(move || {
+        let mut lexer = Lexer::from_str(&cloned);
+        let tokens: Vec<_> = lexer.map(|token| token.range).collect();
+
+        sender
+            .send(tokens)
+            .expect("Could not send tokens to receiver");
+    });
+    let token_ranges = receiver
+        .recv_timeout(Duration::from_secs(2))
+        .unwrap_or_else(|_| {
+            panic!(
+                "Lexer is infinitely recursing with this code: ->{}<-",
+                string
+            )
+        });
+
+    let mut new_str = String::with_capacity(string.len());
+    let mut idx = TextSize::from(0);
+
+    for range in token_ranges {
+        new_str.push_str(&string[range]);
+        idx += range.len();
+    }
+
+    string == new_str
+}
+
+#[test]
+fn empty() {
+    assert_lex! {
+        "",
+        EOF:0
+    }
+}
+
+#[test]
+fn string() {
+    assert_lex! {
+        "'5098382'",
+        CSS_STRING_LITERAL:9,
+        EOF:0
+    }
+
+    // double quote
+    assert_lex! {
+        r#"'hel"lo"'"#,
+        CSS_STRING_LITERAL:9,
+        EOF:0
+    }
+
+    // escaped quote
+    assert_lex! {
+        r#"'hel\'lo\''"#,
+        CSS_STRING_LITERAL:11,
+        EOF:0
+    }
+
+    // escaped quote
+    assert_lex! {
+        r#""hel\"lo\"""#,
+        CSS_STRING_LITERAL:11,
+        EOF:0
+    }
+
+    // unicode
+    assert_lex! {
+        "'юникод'",
+        CSS_STRING_LITERAL:14,
+        EOF:0
+    }
+
+    // missing single closing quote
+    assert_lex! {
+        "'he",
+        ERROR_TOKEN:3,
+        EOF:0
+    }
+
+    // missing double closing quote
+    assert_lex! {
+        r#""he"#,
+        ERROR_TOKEN:3,
+        EOF:0
+    }
+
+    // line break
+    assert_lex! {
+        r#"'he
+    "#,
+        ERROR_TOKEN:3,
+        NEWLINE:1,
+        WHITESPACE:4,
+        EOF:0
+    }
+
+    // line break
+    assert_lex! {
+        r#"'he
+    '"#,
+        ERROR_TOKEN:3,
+        NEWLINE:1,
+        WHITESPACE:4,
+        ERROR_TOKEN:1,
+        EOF:0
+    }
+
+    assert_lex! {
+        r#""Escaped \n""#,
+        CSS_STRING_LITERAL:12,
+        EOF:0
+    }
+
+    assert_lex! {
+        r#""Escaped \r""#,
+        CSS_STRING_LITERAL:12,
+        EOF:0
+    }
+
+    // invalid escape sequence
+    assert_lex! {
+        r#"'\0'"#,
+        ERROR_TOKEN:4,
+        EOF:0
+    }
+}
+
+#[test]
+fn single_line_comments() {
+    assert_lex! {
+        "//abc
+    ",
+        COMMENT:5,
+        NEWLINE:1,
+        WHITESPACE:4,
+        EOF:0
+    }
+
+    assert_lex! {
+        "//a",
+        COMMENT:3,
+        EOF:0
+    }
+}
+
+#[test]
+fn block_comment() {
+    assert_lex! {
+        "/*
+        */",
+        MULTILINE_COMMENT:13,
+        EOF:0
+    }
+
+    assert_lex! {
+        "/* */",
+        COMMENT:5,
+        EOF:0
+    }
+
+    assert_lex! {
+        "/* *",
+        COMMENT:4,
+        EOF:0
+    }
+}
diff --git a/crates/rome_css_parser/src/lib.rs b/crates/rome_css_parser/src/lib.rs
new file mode 100644
index 00000000000..027ef38d7e4
--- /dev/null
+++ b/crates/rome_css_parser/src/lib.rs
@@ -0,0 +1,4 @@
+//! Extremely fast, lossless, and error tolerant CSS Parser.
+
+mod lexer;
+mod prelude;
diff --git a/crates/rome_css_parser/src/prelude.rs b/crates/rome_css_parser/src/prelude.rs
new file mode 100644
index 00000000000..bd22b87c894
--- /dev/null
+++ b/crates/rome_css_parser/src/prelude.rs
@@ -0,0 +1,2 @@
+pub use rome_css_syntax::T;
+pub use rome_parser::prelude::*;
diff --git a/crates/rome_css_syntax/Cargo.toml b/crates/rome_css_syntax/Cargo.toml
index 8f6d869dab7..770a2a9e4b7 100644
--- a/crates/rome_css_syntax/Cargo.toml
+++ b/crates/rome_css_syntax/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 authors.workspace    = true
+description          = "SyntaxKind and common rowan definitions for rome_css_parser"
+documentation        = "https://rustdocs.rome.tools/rome_css_parser/index.html"
 edition.workspace    = true
 license.workspace    = true
 name                 = "rome_css_syntax"
 repository.workspace = true
-version              = "0.0.0"
-
+version              = "0.0.1"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
diff --git a/crates/rome_css_syntax/src/generated/kind.rs b/crates/rome_css_syntax/src/generated/kind.rs
index 7acafe73281..ca29377b187 100644
--- a/crates/rome_css_syntax/src/generated/kind.rs
+++ b/crates/rome_css_syntax/src/generated/kind.rs
@@ -208,6 +208,7 @@ pub enum CssSyntaxKind {
     NEWLINE,
     WHITESPACE,
     COMMENT,
+    MULTILINE_COMMENT,
     CSS_ROOT,
     CSS_ID_SELECTOR_PATTERN,
     CSS_RULE,
diff --git a/crates/rome_json_parser/src/lexer/mod.rs b/crates/rome_json_parser/src/lexer/mod.rs
index 25fbbaaead3..95cc27c5362 100644
--- a/crates/rome_json_parser/src/lexer/mod.rs
+++ b/crates/rome_json_parser/src/lexer/mod.rs
@@ -24,7 +24,7 @@ impl Token {
     }
 }
 
-/// An extremely fast, lookup table based, lossless ECMAScript lexer
+/// An extremely fast, lookup table based, lossless JSON lexer
 #[derive(Debug)]
 pub(crate) struct Lexer<'src> {
     /// Source text
diff --git a/xtask/codegen/src/css_kinds_src.rs b/xtask/codegen/src/css_kinds_src.rs
index 7c2d3e2962d..43003f298c6 100644
--- a/xtask/codegen/src/css_kinds_src.rs
+++ b/xtask/codegen/src/css_kinds_src.rs
@@ -200,7 +200,14 @@ pub const CSS_KINDS_SRC: KindsSrc = KindsSrc {
         "CSS_CUSTOM_PROPERTY",
         "CSS_SPACE_LITERAL",
     ],
-    tokens: &["ERROR_TOKEN", "IDENT", "NEWLINE", "WHITESPACE", "COMMENT"],
+    tokens: &[
+        "ERROR_TOKEN",
+        "IDENT",
+        "NEWLINE",
+        "WHITESPACE",
+        "COMMENT",
+        "MULTILINE_COMMENT",
+    ],
     nodes: &[
         "CSS_ROOT",
         "CSS_ID_SELECTOR_PATTERN",