Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
update python api readme (#504)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenwei-intel authored Oct 20, 2023
1 parent b9155ef commit 5f4175a
Show file tree
Hide file tree
Showing 15 changed files with 158 additions and 122 deletions.
127 changes: 80 additions & 47 deletions intel_extension_for_transformers/llm/runtime/graph/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,10 @@ def baichuan13B_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", hparams["intermediate_size"]))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

vocab = load_vocab_for_baichuan(Path(dir_model))
counter = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
byte_encoder = bytes_to_unicode()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,10 @@ def chatglm2_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
fout.write(struct.pack("i", hparams["ffn_hidden_size"]))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))


vocab = load_vocab_for_glm2(Path(dir_model))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

# Is this correct??
dot_token = tokenizer.encode(".")[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
byte_encoder = bytes_to_unicode()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

byte_encoder = bytes_to_unicode()
byte_decoder = {v:k for k, v in byte_encoder.items()}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

# write vocab
vocab_size = hparams["vocab_size"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

vocab_size = hparams["vocab_size"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

vocab_size = hparams["vocab_size"]
encoder = tokenizer.vocab
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ def main(args_in: Optional[List[str]] = None) -> None:
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))

fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id else -1))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))

byte_encoder = bytes_to_unicode()
byte_decoder = {v:k for k, v in byte_encoder.items()}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,63 +24,63 @@

def main(args_in: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(description="main program llm running")
parser.add_argument("--model_name", type=str, help="model name", required=True)
parser.add_argument("-m", "--model", type=Path, help="path ne model", required=True)
parser.add_argument("--model_name", type=str, help="Model name: String", required=True)
parser.add_argument("-m", "--model", type=Path, help="Path to the executed model: String", required=True)
parser.add_argument(
"--build_dir", type=Path, help="path to build directory", default=build_path
"--build_dir", type=Path, help="Path to the build file: String", default=build_path
)
parser.add_argument(
"-p",
"--prompt",
type=str,
help="prompt to start generation with (default: empty)",
help="Prompt to start generation with: String (default: empty)",
default="",
)
parser.add_argument(
"--tokenizer",
type=str,
help="the path of the chatglm tokenizer",
help="The path of the chatglm tokenizer: String (default: THUDM/chatglm-6b)",
default="THUDM/chatglm-6b",
)
parser.add_argument(
"-n",
"--n_predict",
type=int,
help="number of tokens to predict (default: -1, -1 = infinity)",
help="Number of tokens to predict: Int (default: 0, -1 = all)",
default=-1,
)
parser.add_argument(
"-t",
"--threads",
type=int,
help="number of threads to use during computation (default: 56)",
help="Number of threads to use during computation: Int (default: 56)",
default=56,
)
parser.add_argument(
"-b",
"--batch_size_truncate",
type=int,
help="batch size for prompt processing (default: 512)",
help="Batch size for prompt processing: Int (default: 512)",
default=512,
)
parser.add_argument(
"-c",
"--ctx_size",
type=int,
help="size of the prompt context (default: 512)",
help="Size of the prompt context: Int (default: 512, can not be larger than specific model's context window length)",
default=512,
)
parser.add_argument(
"-s",
"--seed",
type=int,
help="NG seed (default: -1, use random seed for < 0)",
help="NG seed: Int (default: -1, use random seed for < 0)",
default=-1,
)
parser.add_argument(
"--repeat_penalty",
type=float,
help="penalize repeat sequence of tokens (default: 1.1, 1.0 = disabled)",
help="Penalize repeat sequence of tokens: Float (default: 1.1, 1.0 = disabled)",
default=1.1,
)
parser.add_argument(
Expand All @@ -91,7 +91,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
parser.add_argument(
"--keep",
type=int,
help="number of tokens to keep from the initial prompt (default: 0, -1 = all)",
help="Number of tokens to keep from the initial prompt: Int (default: 0, -1 = all)",
default=0,
)
parser.add_argument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,48 +35,48 @@ def main(args_in: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(description="Quantize weights of NE files")
parser.add_argument("--model_name", type=str, help="model name", required=True)
parser.add_argument(
"--model_file", type=Path, help="path to the fp32 model", required=True
"--model_file", type=Path, help="Path to the fp32 model: String", required=True
)
parser.add_argument(
"--out_file", type=Path, help="path to the quantized model", required=True
"--out_file", type=Path, help="Path to the quantized model: String", required=True
)
parser.add_argument(
"--build_dir", type=Path, help="path to build directory", default=build_path
"--build_dir", type=Path, help="Path to the build file: String", default=build_path
)
parser.add_argument(
"--config",
type=Path,
help="path to the configuration file (default: )",
help="Path to the configuration file: String (default: \"\")",
default="",
)
parser.add_argument(
"--nthread", type=int, help="number of threads to use (default: 1)", default=1
"--nthread", type=int, help="Number of threads to use: Int (default: 1)", default=1
)
parser.add_argument(
"--weight_dtype",
choices=["int4", "int8"],
help="weight data type, default: int4",
help="Data type of quantized weight: int4/int8 (default: int4)",
default="int4",
)
parser.add_argument(
"--alg",
type=str,
help="qquantization algorithm to use: sym/asym (default: sym)",
help="Quantization algorithm to use: sym/asym (default: sym)",
default="sym",
)
parser.add_argument(
"--group_size", type=int, help="group size (default: 32)", default=32
"--group_size", type=int, help="Group size: Int (default: 32)", default=32
)
parser.add_argument(
"--scale_dtype",
type=str,
help="fp32/bf16 type for scales (default: fp32)",
help="Data type of scales: bf16/fp32 (default: fp32)",
default="fp32",
)
parser.add_argument(
"--compute_dtype",
type=str,
help="data type of Gemm computation: int8/bf16/fp32 (default: int8)",
help="Data type of Gemm computation: int8/bf16/fp32 (default: int8)",
default="int8",
)
parser.add_argument(
Expand Down
26 changes: 13 additions & 13 deletions intel_extension_for_transformers/llm/runtime/graph/scripts/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,28 @@ def main(args_in: Optional[List[str]] = None) -> None:
parser.add_argument(
"--weight_dtype",
choices=["int4", "int8"],
help="weight data type, default: int4",
help="Data type of quantized weight: int4/int8 (default int4)",
default="int4",
)
parser.add_argument(
"--alg",
type=str,
help="qquantization algorithm to use: sym/asym (default: sym)",
help="Quantization algorithm: sym/asym (default sym)",
default="sym",
)
parser.add_argument(
"--group_size", type=int, help="group size (default: 32)", default=32
"--group_size", type=int, help="Group size: Int (default: 32)", default=32
)
parser.add_argument(
"--scale_dtype",
type=str,
help="fp32/bf16 type for scales (default: fp32)",
help="Data type of scales: fp32/bf16 (dafault fp32)",
default="fp32",
)
parser.add_argument(
"--compute_dtype",
type=str,
help="data type of Gemm computation: int8/bf16/fp32 (default: int8)",
help="Data type of Gemm computation: int8/bf16/fp32 (default: int8)",
default="int8",
)
parser.add_argument(
Expand All @@ -79,48 +79,48 @@ def main(args_in: Optional[List[str]] = None) -> None:
"-p",
"--prompt",
type=str,
help="prompt to start generation with (default: empty)",
help="Prompt to start generation with: String (default: empty)",
default="Once upon a time, there existed a ",
)
parser.add_argument(
"-n",
"--n_predict",
type=int,
help="number of tokens to predict (default: -1, -1 = infinity)",
help="Number of tokens to predict: Int (default: -1, -1 = infinity)",
default=-1,
)
parser.add_argument(
"-t",
"--threads",
type=int,
help="number of threads to use during computation (default: 56)",
help="Number of threads to use during computation: Int (default: 56)",
default=56,
)
parser.add_argument(
"-b",
"--batch_size_truncate",
type=int,
help="batch size for prompt processing (default: 512)",
help="Batch size for prompt processing: Int (default: 512)",
default=512,
)
parser.add_argument(
"-c",
"--ctx_size",
type=int,
help="size of the prompt context (default: 512)",
help="Size of the prompt context: Int (default: 512, can not be larger than specific model's context window length)",
default=512,
)
parser.add_argument(
"-s",
"--seed",
type=int,
help="NG seed (default: -1, use random seed for < 0)",
help="NG seed: Int (default: -1, use random seed for < 0)",
default=-1,
)
parser.add_argument(
"--repeat_penalty",
type=float,
help="penalize repeat sequence of tokens (default: 1.1, 1.0 = disabled)",
help="NG seed: Int (default: -1, use random seed for < 0)",
default=1.1,
)
parser.add_argument(
Expand All @@ -131,7 +131,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
parser.add_argument(
"--keep",
type=int,
help="number of tokens to keep from the initial prompt (default: 0, -1 = all) ",
help="Number of tokens to keep from the initial prompt: Int (default: 0, -1 = all)",
default=0,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,6 @@ class AutoModel(_BaseQBitsAutoModelClass):

class AutoModelForSeq2SeqLM(_BaseQBitsAutoModelClass):
ORIG_MODEL = transformers.AutoModelForSeq2SeqLM

class GPTBigCodeForCausalLM(_BaseQBitsAutoModelClass):
ORIG_MODEL = transformers.GPTBigCodeForCausalLM

0 comments on commit 5f4175a

Please sign in to comment.