Skip to content

Commit

Permalink
Switched to pre-built PagedAttentionExtension from openvinotoolkit/op…
Browse files Browse the repository at this point in the history
…envino_contrib#867. Minimized debug output.
  • Loading branch information
slyalin committed Feb 7, 2024
1 parent 7234bc9 commit 23088ba
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,26 +196,26 @@ def wrapper(module, target_op, *args, **kwargs):
extension=[
ModuleExtension(
PagedAttention,
extension=lambda module: 'PagedAttentionPlaceholder',
extension=lambda module: 'PagedAttentionExtension',
replacer=lambda module, *args, **kwargs: args[0],
wrapper=wrapper
)
),
'libuser_ov_extensions.so'
]
)

for input_name, input_data, input_tensor in zip(input_names, flatten_input, ov_model.inputs):
for input_data, input_tensor in zip(flatten_input, ov_model.inputs):
if input_tensor.element_type.is_dynamic():
input_tensor.get_node().set_element_type(ov_dtype_maping[input_data.dtype])
if input_tensor.partial_shape.rank.is_dynamic:
input_tensor.get_node().set_partial_shape(ov.PartialShape([-1]*input_data.ndim))
#input_tensor.get_tensor().set_names({input_name})

for out_name, out in zip(output_names, ov_model.outputs):
out.get_tensor().set_names({out_name})
ov_model.validate_nodes_and_infer_types()
#ov.save_model(ov_model, "vllm_openvino_model.xml")
print('>>>>>>>>>>>>> OV MODEL CONVERTED')
print(ov_model)
#print(ov_model)
ov_compiled = ov.compile_model(ov_model)

from functools import partial
Expand Down Expand Up @@ -243,6 +243,7 @@ def wrapper(*args, **kwargs):
inputs.append(input_metadata.block_tables)
#for input in inputs:
# print(f'{input.dtype} wiht shape {input.shape}' if isinstance(input, torch.Tensor) else type(input))
#print('input_metadata.slot_mapping:', input_metadata.slot_mapping)
result = ov_compiled(inputs, share_outputs=False)
#print(f'result: {type(result)}')
return torch.from_numpy(result[0])
Expand Down

0 comments on commit 23088ba

Please sign in to comment.