Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove eviction param #2285

Merged
merged 1 commit into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions lmdeploy/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,6 @@ class PytorchEngineConfig:
by the k/v cache. For lmdeploy versions greater than `v0.2.1`,
it defaults to 0.8, signifying the percentage of FREE GPU memory
to be reserved for the k/v cache
eviction_type (str): What action to perform when kv cache
is full, ['recompute', 'copy'], Deprecated.
prefill_interval (int): Interval to perform prefill,
Default 16.
block_size (int): paging cache block size, default 64.
Expand All @@ -198,7 +196,6 @@ class PytorchEngineConfig:
session_len: int = None
max_batch_size: int = 128
cache_max_entry_count: float = 0.8
eviction_type: str = 'recompute'
prefill_interval: int = 16
block_size: int = 64
num_cpu_blocks: int = 0
Expand All @@ -216,8 +213,6 @@ def __post_init__(self):
assert self.tp >= 1, 'invalid tp'
assert self.max_batch_size >= 1, 'invalid max_batch_size'
assert self.cache_max_entry_count > 0 and self.cache_max_entry_count < 1, 'invalid cache_max_entry_count' # noqa
assert self.eviction_type in ('recompute',
'copy'), 'invalid eviction_type'
assert self.num_cpu_blocks >= 0, 'invalid num_cpu_blocks'
assert self.max_prefill_token_num >= 0, 'invalid max_prefill_token_num'
assert self.num_gpu_blocks >= 0, 'invalid num_gpu_blocks'
Expand Down
1 change: 0 additions & 1 deletion lmdeploy/pytorch/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def __init__(self,
scheduler_config = SchedulerConfig(
max_batches=engine_config.max_batch_size,
max_session_len=engine_config.session_len,
eviction_type=engine_config.eviction_type,
prefill_interval=engine_config.prefill_interval)

# block_size = 1 to enable unified paging
Expand Down
Loading