Skip to content

Commit

Permalink
feat: output of Copy. (#12594)
Browse files Browse the repository at this point in the history
* feat: output of Copy.

* update tests.
  • Loading branch information
youngsofun authored Aug 29, 2023
1 parent cad282e commit 0ca9a3c
Show file tree
Hide file tree
Showing 71 changed files with 699 additions and 110 deletions.
45 changes: 28 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/common/storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ async-backtrace = { workspace = true }
async-trait = "0.1"
bytes = "1"
chrono = { workspace = true }
dashmap = { version = "5.5.1", features = ["serde"] }
flagset = "0.4"
futures = "0.3"
log = { workspace = true }
Expand Down
108 changes: 108 additions & 0 deletions src/common/storage/src/copy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright 2021 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_exception::ErrorCode;
use dashmap::mapref::entry::Entry;
use dashmap::DashMap;
use serde::Deserialize;
use serde::Serialize;

#[derive(Default, Clone, Serialize, Deserialize)]
pub struct CopyStatus {
/// Key is file path.
pub files: DashMap<String, FileStatus>,
}

impl CopyStatus {
pub fn add_chunk(&self, file_path: &str, file_status: FileStatus) {
match self.files.entry(file_path.to_string()) {
Entry::Occupied(mut e) => {
e.get_mut().merge(file_status);
}
Entry::Vacant(e) => {
e.insert(file_status);
}
};
}

pub fn merge(&self, other: CopyStatus) {
for (k, v) in other.files.into_iter() {
self.add_chunk(&k, v);
}
}
}

#[derive(Default, Clone, Serialize, Deserialize)]
pub struct FileStatus {
pub num_rows_loaded: usize,
pub error: Option<FileErrorsInfo>,
}

impl FileStatus {
pub fn add_error(&mut self, e: ErrorCode, line: usize) {
match &mut self.error {
None => {
self.error = Some(FileErrorsInfo {
num_errors: 1,
first_error: FileErrorInfo {
code: e.code(),
message: e.message(),
line,
},
});
}
Some(info) => {
info.num_errors += 1;
if info.first_error.line > line {
info.first_error = FileErrorInfo {
code: e.code(),
message: e.message(),
line,
};
}
}
};
}

fn merge(&mut self, other: FileStatus) {
self.num_rows_loaded += other.num_rows_loaded;
match (&mut self.error, other.error) {
(None, Some(e)) => self.error = Some(e),
(Some(e1), Some(e2)) => e1.merge(e2),
_ => {}
}
}
}

#[derive(Default, Clone, Serialize, Deserialize)]
pub struct FileErrorsInfo {
pub num_errors: usize,
pub first_error: FileErrorInfo,
}

impl FileErrorsInfo {
fn merge(&mut self, other: FileErrorsInfo) {
self.num_errors += other.num_errors;
if self.first_error.line > other.first_error.line {
self.first_error = other.first_error;
}
}
}

#[derive(Default, Clone, Serialize, Deserialize)]
pub struct FileErrorInfo {
pub code: u16,
pub message: String,
pub line: usize,
}
4 changes: 4 additions & 0 deletions src/common/storage/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ pub use stage::StageFileInfo;
pub use stage::StageFileStatus;
pub use stage::StageFilesInfo;

mod copy;
mod statistics;

pub use copy::CopyStatus;
pub use copy::FileStatus;
pub use statistics::Datum;
pub use statistics::F64;
6 changes: 6 additions & 0 deletions src/query/catalog/src/table_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ use common_meta_app::principal::UserInfo;
use common_pipeline_core::InputError;
use common_settings::ChangeValue;
use common_settings::Settings;
use common_storage::CopyStatus;
use common_storage::DataOperator;
use common_storage::FileStatus;
use common_storage::StageFileInfo;
use common_storage::StorageMetrics;
use dashmap::DashMap;
Expand Down Expand Up @@ -197,4 +199,8 @@ pub trait TableContext: Send + Sync {
fn add_segment_location(&self, segment_loc: Location) -> Result<()>;

fn get_segment_locations(&self) -> Result<Vec<Location>>;

fn add_file_status(&self, file_path: &str, file_status: FileStatus) -> Result<()>;

fn get_copy_status(&self) -> Arc<CopyStatus>;
}
Loading

1 comment on commit 0ca9a3c

@vercel
Copy link

@vercel vercel bot commented on 0ca9a3c Aug 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend.vercel.app
databend-git-main-databend.vercel.app
databend.rs
databend-databend.vercel.app

Please sign in to comment.