-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Deprecate RuntimeConfig
, update code to use new builder style
#13635
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,8 +23,8 @@ use std::sync::{Arc, OnceLock}; | |
|
||
use datafusion::error::{DataFusionError, Result}; | ||
use datafusion::execution::context::SessionConfig; | ||
use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool}; | ||
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; | ||
use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, MemoryPool}; | ||
use datafusion::execution::runtime_env::RuntimeEnvBuilder; | ||
use datafusion::prelude::SessionContext; | ||
use datafusion_cli::catalog::DynamicObjectStoreCatalog; | ||
use datafusion_cli::functions::ParquetMetadataFunc; | ||
|
@@ -156,27 +156,22 @@ async fn main_inner() -> Result<()> { | |
session_config = session_config.with_batch_size(batch_size); | ||
}; | ||
|
||
let rt_config = RuntimeConfig::new(); | ||
let rt_config = | ||
// set memory pool size | ||
if let Some(memory_limit) = args.memory_limit { | ||
// set memory pool type | ||
match args.mem_pool_type { | ||
PoolType::Fair => rt_config | ||
.with_memory_pool(Arc::new(FairSpillPool::new(memory_limit))), | ||
PoolType::Greedy => rt_config | ||
.with_memory_pool(Arc::new(GreedyMemoryPool::new(memory_limit))) | ||
} | ||
} else { | ||
rt_config | ||
let mut rt_builder = RuntimeEnvBuilder::new(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this code is now clearer -- previously the |
||
// set memory pool size | ||
if let Some(memory_limit) = args.memory_limit { | ||
// set memory pool type | ||
let pool: Arc<dyn MemoryPool> = match args.mem_pool_type { | ||
PoolType::Fair => Arc::new(FairSpillPool::new(memory_limit)), | ||
PoolType::Greedy => Arc::new(GreedyMemoryPool::new(memory_limit)), | ||
}; | ||
rt_builder = rt_builder.with_memory_pool(pool) | ||
} | ||
|
||
let runtime_env = create_runtime_env(rt_config.clone())?; | ||
let runtime_env = rt_builder.build_arc()?; | ||
|
||
// enable dynamic file query | ||
let ctx = | ||
SessionContext::new_with_config_rt(session_config.clone(), Arc::new(runtime_env)) | ||
.enable_url_table(); | ||
let ctx = SessionContext::new_with_config_rt(session_config, runtime_env) | ||
.enable_url_table(); | ||
ctx.refresh_catalogs().await?; | ||
// install dynamic catalog provider that can register required object stores | ||
ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new( | ||
|
@@ -231,10 +226,6 @@ async fn main_inner() -> Result<()> { | |
Ok(()) | ||
} | ||
|
||
fn create_runtime_env(rn_config: RuntimeConfig) -> Result<RuntimeEnv> { | ||
RuntimeEnv::try_new(rn_config) | ||
} | ||
|
||
fn parse_valid_file(dir: &str) -> Result<String, String> { | ||
if Path::new(dir).is_file() { | ||
Ok(dir.to_string()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -753,7 +753,6 @@ mod tests { | |
use datafusion_common::cast::as_string_array; | ||
use datafusion_common::internal_err; | ||
use datafusion_common::stats::Precision; | ||
use datafusion_execution::runtime_env::RuntimeEnvBuilder; | ||
use datafusion_expr::{col, lit}; | ||
|
||
use chrono::DateTime; | ||
|
@@ -984,12 +983,10 @@ mod tests { | |
async fn query_compress_data( | ||
file_compression_type: FileCompressionType, | ||
) -> Result<()> { | ||
let runtime = Arc::new(RuntimeEnvBuilder::new().build()?); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This simply creates a default RuntimeEnv which the SessionStateBuilder already does, so there is no need to do it again |
||
let mut cfg = SessionConfig::new(); | ||
cfg.options_mut().catalog.has_header = true; | ||
let session_state = SessionStateBuilder::new() | ||
.with_config(cfg) | ||
.with_runtime_env(runtime) | ||
.with_default_features() | ||
.build(); | ||
let integration = LocalFileSystem::new_with_prefix(arrow_test_data()).unwrap(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1792,7 +1792,6 @@ mod tests { | |
use super::{super::options::CsvReadOptions, *}; | ||
use crate::assert_batches_eq; | ||
use crate::execution::memory_pool::MemoryConsumer; | ||
use crate::execution::runtime_env::RuntimeEnvBuilder; | ||
use crate::test; | ||
use crate::test_util::{plan_and_collect, populate_csv_partitions}; | ||
use arrow_schema::{DataType, TimeUnit}; | ||
|
@@ -1932,14 +1931,12 @@ mod tests { | |
let path = path.join("tests/tpch-csv"); | ||
let url = format!("file://{}", path.display()); | ||
|
||
let runtime = RuntimeEnvBuilder::new().build_arc()?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. likewise here, this is the default runtime env, it isn't needed |
||
let cfg = SessionConfig::new() | ||
.set_str("datafusion.catalog.location", url.as_str()) | ||
.set_str("datafusion.catalog.format", "CSV") | ||
.set_str("datafusion.catalog.has_header", "true"); | ||
let session_state = SessionStateBuilder::new() | ||
.with_config(cfg) | ||
.with_runtime_env(runtime) | ||
.with_default_features() | ||
.build(); | ||
let ctx = SessionContext::new_with_state(session_state); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,13 +41,32 @@ use url::Url; | |
/// Execution runtime environment that manages system resources such | ||
/// as memory, disk, cache and storage. | ||
/// | ||
/// A [`RuntimeEnv`] is created from a [`RuntimeEnvBuilder`] and has the | ||
/// A [`RuntimeEnv`] can be created using [`RuntimeEnvBuilder`] and has the | ||
/// following resource management functionality: | ||
/// | ||
/// * [`MemoryPool`]: Manage memory | ||
/// * [`DiskManager`]: Manage temporary files on local disk | ||
/// * [`CacheManager`]: Manage temporary cache data during the session lifetime | ||
/// * [`ObjectStoreRegistry`]: Manage mapping URLs to object store instances | ||
/// | ||
/// # Example: Create default `RuntimeEnv` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new doc examples |
||
/// ``` | ||
/// # use datafusion_execution::runtime_env::RuntimeEnv; | ||
/// let runtime_env = RuntimeEnv::default(); | ||
/// ``` | ||
/// | ||
/// # Example: Create a `RuntimeEnv` from [`RuntimeEnvBuilder`] with a new memory pool | ||
/// ``` | ||
/// # use std::sync::Arc; | ||
/// # use datafusion_execution::memory_pool::GreedyMemoryPool; | ||
/// # use datafusion_execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder}; | ||
/// // restrict to using at most 100MB of memory | ||
/// let pool_size = 100 * 1024 * 1024; | ||
/// let runtime_env = RuntimeEnvBuilder::new() | ||
/// .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size))) | ||
/// .build() | ||
/// .unwrap(); | ||
/// ``` | ||
pub struct RuntimeEnv { | ||
/// Runtime memory management | ||
pub memory_pool: Arc<dyn MemoryPool>, | ||
|
@@ -66,28 +85,16 @@ impl Debug for RuntimeEnv { | |
} | ||
|
||
impl RuntimeEnv { | ||
#[deprecated(since = "43.0.0", note = "please use `try_new` instead")] | ||
#[deprecated(since = "43.0.0", note = "please use `RuntimeEnvBuilder` instead")] | ||
#[allow(deprecated)] | ||
pub fn new(config: RuntimeConfig) -> Result<Self> { | ||
Self::try_new(config) | ||
} | ||
/// Create env based on configuration | ||
#[deprecated(since = "44.0.0", note = "please use `RuntimeEnvBuilder` instead")] | ||
#[allow(deprecated)] | ||
pub fn try_new(config: RuntimeConfig) -> Result<Self> { | ||
let RuntimeConfig { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this code is literally the same as RuntimeConfig::build so just call that directly |
||
memory_pool, | ||
disk_manager, | ||
cache_manager, | ||
object_store_registry, | ||
} = config; | ||
|
||
let memory_pool = | ||
memory_pool.unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default())); | ||
|
||
Ok(Self { | ||
memory_pool, | ||
disk_manager: DiskManager::try_new(disk_manager)?, | ||
cache_manager: CacheManager::try_new(&cache_manager)?, | ||
object_store_registry, | ||
}) | ||
config.build() | ||
} | ||
|
||
/// Registers a custom `ObjectStore` to be used with a specific url. | ||
|
@@ -104,7 +111,7 @@ impl RuntimeEnv { | |
/// # use std::sync::Arc; | ||
/// # use url::Url; | ||
/// # use datafusion_execution::runtime_env::RuntimeEnv; | ||
/// # let runtime_env = RuntimeEnv::try_new(Default::default()).unwrap(); | ||
/// # let runtime_env = RuntimeEnv::default(); | ||
/// let url = Url::try_from("file://").unwrap(); | ||
/// let object_store = object_store::local::LocalFileSystem::new(); | ||
/// // register the object store with the runtime environment | ||
|
@@ -119,11 +126,12 @@ impl RuntimeEnv { | |
/// # use std::sync::Arc; | ||
/// # use url::Url; | ||
/// # use datafusion_execution::runtime_env::RuntimeEnv; | ||
/// # let runtime_env = RuntimeEnv::try_new(Default::default()).unwrap(); | ||
/// # let runtime_env = RuntimeEnv::default(); | ||
/// # // use local store for example as http feature is not enabled | ||
/// # let http_store = object_store::local::LocalFileSystem::new(); | ||
/// // create a new object store via object_store::http::HttpBuilder; | ||
/// let base_url = Url::parse("https://github.com").unwrap(); | ||
/// // (note this example can't depend on the http feature) | ||
/// // let http_store = HttpBuilder::new() | ||
/// // .with_url(base_url.clone()) | ||
/// // .build() | ||
|
@@ -155,12 +163,15 @@ impl Default for RuntimeEnv { | |
} | ||
} | ||
|
||
/// Please see: <https://github.com/apache/datafusion/issues/12156> | ||
/// Please see: <https://github.com/apache/datafusion/issues/12156a> | ||
comphead marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// This a type alias for backwards compatibility. | ||
#[deprecated(since = "43.0.0", note = "please use `RuntimeEnvBuilder` instead")] | ||
pub type RuntimeConfig = RuntimeEnvBuilder; | ||
|
||
#[derive(Clone)] | ||
/// Execution runtime configuration | ||
/// Execution runtime configuration builder. | ||
/// | ||
/// See example on [`RuntimeEnv`] | ||
pub struct RuntimeEnvBuilder { | ||
/// DiskManager to manage temporary disk file usage | ||
pub disk_manager: DiskManagerConfig, | ||
|
@@ -239,15 +250,20 @@ impl RuntimeEnvBuilder { | |
|
||
/// Build a RuntimeEnv | ||
pub fn build(self) -> Result<RuntimeEnv> { | ||
let memory_pool = self | ||
.memory_pool | ||
.unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default())); | ||
let Self { | ||
disk_manager, | ||
memory_pool, | ||
cache_manager, | ||
object_store_registry, | ||
} = self; | ||
let memory_pool = | ||
memory_pool.unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default())); | ||
|
||
Ok(RuntimeEnv { | ||
memory_pool, | ||
disk_manager: DiskManager::try_new(self.disk_manager)?, | ||
cache_manager: CacheManager::try_new(&self.cache_manager)?, | ||
object_store_registry: self.object_store_registry, | ||
disk_manager: DiskManager::try_new(disk_manager)?, | ||
cache_manager: CacheManager::try_new(&cache_manager)?, | ||
object_store_registry, | ||
}) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,10 +16,8 @@ | |
// under the License. | ||
|
||
use crate::{ | ||
config::SessionConfig, | ||
memory_pool::MemoryPool, | ||
registry::FunctionRegistry, | ||
runtime_env::{RuntimeEnv, RuntimeEnvBuilder}, | ||
config::SessionConfig, memory_pool::MemoryPool, registry::FunctionRegistry, | ||
runtime_env::RuntimeEnv, | ||
}; | ||
use datafusion_common::{plan_datafusion_err, DataFusionError, Result}; | ||
use datafusion_expr::planner::ExprPlanner; | ||
|
@@ -54,9 +52,7 @@ pub struct TaskContext { | |
|
||
impl Default for TaskContext { | ||
fn default() -> Self { | ||
let runtime = RuntimeEnvBuilder::new() | ||
.build_arc() | ||
.expect("default runtime created successfully"); | ||
let runtime = Arc::new(RuntimeEnv::default()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this does the same thing, just makes the intent clearer |
||
|
||
// Create a default task context, mostly useful for testing | ||
Self { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this follows the new builder style model more fully