You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/home/abalogh/anaconda3/envs/text-generation-inference/bin/text-generation-server", line 8, in <module>
sys.exit(app())
^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/cli.py", line 78, in serve
server.serve(
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/server.py", line 166, in serve
asyncio.run(
File "/home/abalogh/anaconda3/envs/text-generation-inference/lib/python3.11/asyncio/runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/home/abalogh/anaconda3/envs/text-generation-inference/lib/python3.11/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/anaconda3/envs/text-generation-inference/lib/python3.11/asyncio/base_events.py", line 653, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/server.py", line 133, in serve_inner
model = get_model(
^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/__init__.py", line 215, in get_model
return FlashLlama(
^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/flash_llama.py", line 65, in __init__
model = FlashLlamaForCausalLM(config, weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 366, in __init__
self.model = FlashLlamaModel(config, weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 305, in __init__
[
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 306, in <listcomp>
FlashLlamaLayer(
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 241, in __init__
self.self_attn = FlashLlamaAttention(
^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 116, in __init__
self.query_key_value = TensorParallelColumnLinear.load_multi(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/utils/layers.py", line 238, in load_multi
weight = weights.get_multi_weights_col(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/utils/weights.py", line 123, in get_multi_weights_col
bits = self.get_tensor("gptq_bits").item()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/utils/weights.py", line 62, in get_tensor
filename, tensor_name = self.get_filename(tensor_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/abalogh/work/work/contrib/text-generation-inference/server/text_generation_server/utils/weights.py", line 49, in get_filename
raise RuntimeError(f"weight {tensor_name} does not exist")
RuntimeError: weight gptq_bits does not exist
The text was updated successfully, but these errors were encountered:
I have the following problem:
model=Honkware/openchat_8192-GPTQ
text-generation-launcher --model-id $model --num-shard 1 --quantize gptq --port 8080
The text was updated successfully, but these errors were encountered: