Skip to content

Commit

Permalink
fix: fix the issue of misaligned comments after formatting (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
wugeer authored Aug 23, 2024
1 parent ed5f77c commit 78f24a6
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 3 deletions.
48 changes: 45 additions & 3 deletions src/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,35 @@ impl<'a> Formatter<'a> {
}

fn format_line_comment(&self, token: &Token<'_>, query: &mut String) {
let is_whitespace_followed_by_special_token =
self.next_token(1).map_or(false, |current_token| {
current_token.kind == TokenKind::Whitespace
&& self.next_token(2).map_or(false, |next_token| {
matches!(
next_token.kind,
TokenKind::Number
| TokenKind::String
| TokenKind::Word
| TokenKind::ReservedTopLevel
| TokenKind::ReservedTopLevelNoIndent
| TokenKind::ReservedNewline
| TokenKind::Reserved
)
})
});

let previous_token = self.previous_token(1);
if previous_token.is_some()
&& previous_token.unwrap().value.contains("\n")
&& is_whitespace_followed_by_special_token
{
self.add_new_line(query);
} else if let Some(Token { value, .. }) = self.previous_token(2) {
if *value == "," {
self.trim_all_spaces_end(query);
query.push_str(" ");
}
}
query.push_str(token.value);
self.add_new_line(query);
}
Expand Down Expand Up @@ -126,7 +155,7 @@ impl<'a> Formatter<'a> {

// Take out the preceding space unless there was whitespace there in the original query
// or another opening parens or line comment
let previous_token = self.previous_token();
let previous_token = self.previous_token(1);
if previous_token.is_none()
|| !PRESERVE_WHITESPACE_FOR.contains(&previous_token.unwrap().kind)
{
Expand Down Expand Up @@ -222,6 +251,10 @@ impl<'a> Formatter<'a> {
query.truncate(query.trim_end_matches(|c| c == ' ' || c == '\t').len());
}

fn trim_all_spaces_end(&self, query: &mut String) {
query.truncate(query.trim_end_matches(|c: char| c.is_whitespace()).len());
}

fn indent_comment(&self, token: &str) -> String {
let mut combined = String::with_capacity(token.len() + 4);
for (i, line) in token.split('\n').enumerate() {
Expand Down Expand Up @@ -264,8 +297,17 @@ impl<'a> Formatter<'a> {
combined
}

fn previous_token(&self) -> Option<&Token<'_>> {
let index = self.index.checked_sub(1);
fn previous_token(&self, idx: usize) -> Option<&Token<'_>> {
let index = self.index.checked_sub(idx);
if let Some(index) = index {
self.tokens.get(index)
} else {
None
}
}

fn next_token(&self, idx: usize) -> Option<&Token<'_>> {
let index = self.index.checked_add(idx);
if let Some(index) = index {
self.tokens.get(index)
} else {
Expand Down
87 changes: 87 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1504,4 +1504,91 @@ mod tests {

assert_eq!(format(input, &QueryParams::None, options), expected);
}

#[test]
fn it_handles_comments_correctly() {
let input = indoc!(
"
-- 创建一个外部表,存储销售数据
CREATE EXTERNAL TABLE IF NOT EXISTS sales_data (
-- 唯一标识订单ID
order_id BIGINT COMMENT 'Unique identifier for the order',
-- 客户ID
customer_id BIGINT COMMENT 'Unique identifier for the customer',
)
COMMENT 'Sales data table for storing transaction records';
-- 按销售日期和城市进行分区
PARTITIONED BY (
sale_year STRING COMMENT 'Year of the sale',
sale_month STRING COMMENT 'Month of the sale'
)
-- 设置数据存储位置
LOCATION '/user/hive/warehouse/sales_data'
-- 使用 ORC 存储格式
STORED AS ORC
-- 设置表的行格式
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
-- 设置表属性
TBLPROPERTIES (
'orc.compress' = 'SNAPPY', -- 使用SNAPPY压缩
'transactional' = 'true', -- 启用事务支持
'orc.create.index' = 'true', -- 创建索引
'skip.header.line.count' = '1', -- 跳过CSV文件的第一行
'external.table.purge' = 'true' -- 在删除表时自动清理数据
);
-- 自动加载数据到 Hive 分区中
ALTER TABLE sales_data
ADD PARTITION (sale_year = '2024', sale_month = '08')
LOCATION '/user/hive/warehouse/sales_data/2024/08';"
);
let options = FormatOptions {
indent: Indent::Spaces(4),
..Default::default()
};
let expected = indoc!(
"
-- 创建一个外部表,存储销售数据
CREATE EXTERNAL TABLE IF NOT EXISTS sales_data (
-- 唯一标识订单ID
order_id BIGINT COMMENT 'Unique identifier for the order',
-- 客户ID
customer_id BIGINT COMMENT 'Unique identifier for the customer',
) COMMENT 'Sales data table for storing transaction records';
-- 按销售日期和城市进行分区
PARTITIONED BY (
sale_year STRING COMMENT 'Year of the sale',
sale_month STRING COMMENT 'Month of the sale'
)
-- 设置数据存储位置
LOCATION '/user/hive/warehouse/sales_data'
-- 使用 ORC 存储格式
STORED AS ORC
-- 设置表的行格式
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
-- 设置表属性
TBLPROPERTIES (
'orc.compress' = 'SNAPPY', -- 使用SNAPPY压缩
'transactional' = 'true', -- 启用事务支持
'orc.create.index' = 'true', -- 创建索引
'skip.header.line.count' = '1', -- 跳过CSV文件的第一行
'external.table.purge' = 'true' -- 在删除表时自动清理数据
);
-- 自动加载数据到 Hive 分区中
ALTER TABLE
sales_data
ADD
PARTITION (sale_year = '2024', sale_month = '08') LOCATION '/user/hive/warehouse/sales_data/2024/08';"
);

assert_eq!(format(input, &QueryParams::None, options), expected);
}
}

0 comments on commit 78f24a6

Please sign in to comment.