-
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial integration * update nb * add qwen gptq and awq models * add 72b models * update readme * update nb * update nb
- Loading branch information
Showing
4 changed files
with
345 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-style: italic\"> Available Models </span>\n", | ||
"┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓\n", | ||
"┃<span style=\"font-weight: bold\"> Implementation </span>┃<span style=\"font-weight: bold\"> Model ID </span>┃<span style=\"font-weight: bold\"> Input --> Output </span>┃\n", | ||
"┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8 </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4 </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-72B-Instruct-AWQ </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-72B-Instruct </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8 </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4 </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-7B-Instruct-AWQ </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-7B-Instruct </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8 </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-2B-Instruct-AWQ </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"│<span style=\"color: #008080; text-decoration-color: #008080\"> transformers </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> Qwen/Qwen2-VL-2B-Instruct </span>│<span style=\"color: #008000; text-decoration-color: #008000\"> image-text --> text </span>│\n", | ||
"└────────────────┴──────────────────────────────────────┴─────────────────────┘\n", | ||
"</pre>\n" | ||
], | ||
"text/plain": [ | ||
"\u001b[3m Available Models \u001b[0m\n", | ||
"┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓\n", | ||
"┃\u001b[1m \u001b[0m\u001b[1mImplementation\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mModel ID \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mInput --> Output \u001b[0m\u001b[1m \u001b[0m┃\n", | ||
"┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-72B-Instruct-GPTQ-Int8\u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-72B-Instruct-GPTQ-Int4\u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-72B-Instruct-AWQ \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-72B-Instruct \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-7B-Instruct-GPTQ-Int8 \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-7B-Instruct-GPTQ-Int4 \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-7B-Instruct-AWQ \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-7B-Instruct \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-2B-Instruct-GPTQ-Int8 \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-2B-Instruct-AWQ \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"│\u001b[36m \u001b[0m\u001b[36mtransformers \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mQwen/Qwen2-VL-2B-Instruct \u001b[0m\u001b[35m \u001b[0m│\u001b[32m \u001b[0m\u001b[32mimage-text --> text\u001b[0m\u001b[32m \u001b[0m│\n", | ||
"└────────────────┴──────────────────────────────────────┴─────────────────────┘\n" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
} | ||
], | ||
"source": [ | ||
"import xinfer\n", | ||
"\n", | ||
"xinfer.list_models(\"qwen2\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\u001b[32m2024-11-20 23:07:01.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1mModel: Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4\u001b[0m\n", | ||
"\u001b[32m2024-11-20 23:07:01.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mDevice: cuda\u001b[0m\n", | ||
"\u001b[32m2024-11-20 23:07:01.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m65\u001b[0m - \u001b[1mDtype: auto\u001b[0m\n", | ||
"/home/dnth/mambaforge-pypy3/envs/xinfer/lib/python3.10/site-packages/auto_gptq/nn_modules/triton_utils/kernels.py:411: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n", | ||
" def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq):\n", | ||
"/home/dnth/mambaforge-pypy3/envs/xinfer/lib/python3.10/site-packages/auto_gptq/nn_modules/triton_utils/kernels.py:419: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n", | ||
" def backward(ctx, grad_output):\n", | ||
"/home/dnth/mambaforge-pypy3/envs/xinfer/lib/python3.10/site-packages/auto_gptq/nn_modules/triton_utils/kernels.py:461: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n", | ||
" @custom_fwd(cast_inputs=torch.float16)\n", | ||
"CUDA extension not installed.\n", | ||
"CUDA extension not installed.\n", | ||
"/home/dnth/mambaforge-pypy3/envs/xinfer/lib/python3.10/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n", | ||
" warnings.warn(\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"model = xinfer.create_model(\"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4\", device=\"cuda\", dtype=\"auto\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'The image features a plush toy resembling a rabbit. The toy is dressed in a light-colored outfit with green and yellow floral patterns. It has a blue hat with a bow on top and is sitting on a fluffy, pink surface. The background is a solid pink color, providing a simple and clean backdrop that highlights the toy.'" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"image1 = \"https://raw.githubusercontent.com/dnth/x.infer/main/assets/demo/000b9c365c9e307a.jpg\"\n", | ||
"image2 = \"https://raw.githubusercontent.com/dnth/x.infer/main/assets/demo/00aa2580828a9009.jpg\"\n", | ||
"\n", | ||
"prompt = \"Describe the image.\"\n", | ||
"\n", | ||
"model.infer(image1, prompt).text" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[Result(categories=None, boxes=None, masks=None, poses=None, text='The image features a plush toy resembling a rabbit. The toy is dressed in a light-colored outfit with green and yellow floral patterns. It has a blue hat with a bow on top and is sitting on a fluffy, pink surface. The background is a solid pink color, providing a simple and clean backdrop that highlights the toy.'),\n", | ||
" Result(categories=None, boxes=None, masks=None, poses=None, text='The image depicts a lively street scene with a parade taking place. In the foreground, a man is clapping, while another person is gesturing with their hand. The street is lined with spectators, and there are several flags and vehicles participating in the parade. The background features a clock tower and a church with a steeple, indicating a town or city setting. The weather appears overcast, and the overall atmosphere is festive and community-oriented.')]" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"model.infer_batch([image1, image2], prompt)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "xinfer", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.15" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.