From 73ab4d0b76b92426ae73cb74018f2662cdefd062 Mon Sep 17 00:00:00 2001 From: Lutz Roeder Date: Sat, 18 Jan 2025 13:51:13 -0800 Subject: [PATCH] Update executorch.js (#1175) --- source/executorch.js | 104 ++++++++++++++-------- source/python.js | 2 +- source/pytorch-metadata.json | 168 +++++++++++++++++++++++++++++++++++ tools/pytorch_script.py | 38 ++++++++ 4 files changed, 276 insertions(+), 36 deletions(-) diff --git a/source/executorch.js b/source/executorch.js index acafd213f2..b6f13f9188 100644 --- a/source/executorch.js +++ b/source/executorch.js @@ -49,10 +49,14 @@ executorch.Graph = class { this.outputs = []; this.nodes = []; const values = new Map(); - values.map = (arg) => { - if (!values.has(arg)) { - const v = plan.values[arg].val; - if (v instanceof executorch.schema.Tensor || v instanceof executorch.schema.TensorList) { + values.map = (index, output) => { + if (!values.has(index)) { + const v = plan.values[index].val; + const tensor = v instanceof executorch.schema.Tensor || v instanceof executorch.schema.TensorList; + if (output && !tensor) { + const value = [new executorch.Value(index.toString(), null, null)]; + values.set(index, { type: null, value }); + } else if (tensor) { const tensors = v instanceof executorch.schema.Tensor ? [v] : Array.from(v.items).map((arg) => plan.values[arg].val); const list = []; for (let i = 0; i < tensors.length; i++) { @@ -62,26 +66,27 @@ executorch.Graph = class { if (v.data_buffer_idx > 0) { initializer = new executorch.Tensor(tensor); } - const identifier = tensors.length > 1 ? `${arg}.${i}` : arg.toString(); - list.push(new executorch.Value(identifier, type, initializer)); + const identifier = tensors.length > 1 ? `${index}.${i}` : index.toString(); + const value = new executorch.Value(identifier, type, initializer); + list.push(value); } - values.set(arg, { type: null, value: list }); + values.set(index, { type: null, value: list }); } else if (v instanceof executorch.schema.Bool) { - values.set(arg, { type: 'int64', value: v.bool_val }); + values.set(index, { type: 'int64', value: v.bool_val }); } else if (v instanceof executorch.schema.Int) { - values.set(arg, { type: 'int64', value: v.int_val }); + values.set(index, { type: 'int64', value: v.int_val }); } else if (v instanceof executorch.schema.IntList) { const list = v.items.map((index) => plan.values[index].val.int_val); - values.set(arg, { type: 'int64[]', value: list }); + values.set(index, { type: 'int64[]', value: list }); } else if (v instanceof executorch.schema.Double) { - values.set(arg, { type: 'float64', value: v.double_val }); + values.set(index, { type: 'float64', value: v.double_val }); } else if (v instanceof executorch.schema.Null) { - values.set(arg, { type: 'attribute', value: null }); + values.set(index, { type: 'attribute', value: null }); } else { throw new Error('Value type not implemented.'); } } - return values.get(arg); + return values.get(index); }; for (const input of plan.inputs) { const value = values.map(input); @@ -128,6 +133,7 @@ executorch.Node = class { this.name = ''; this.inputs = []; this.outputs = []; + this.attributes = []; const instr_args = instruction.instr_args; if (instr_args instanceof executorch.schema.KernelCall) { const op = plan.operators[instr_args.op_index]; @@ -135,6 +141,9 @@ executorch.Node = class { const identifier = op.overload ? `${op.name}.${op.overload}` : op.name; const schemas = execution.invoke('torch._C._jit_get_schemas_for_operator', [op.name]); const schema = schemas.find((schema) => schema.name === op.name && schema.overload_name === op.overload); + if (!schema) { + throw new executorch.Error(`Operator schema for '${identifier}' not found.`); + } const category = schema && schema.category ? schema.category : ''; const alias = (arg) => arg && arg.alias_info && arg.alias_info.before_set.length === 1 ? arg.alias_info.before_set[0] : null; const outputs = new Set(schema && Array.isArray(schema.returns) ? schema.returns.map((arg) => alias(arg)).filter((alias) => alias !== null) : []); @@ -143,33 +152,59 @@ executorch.Node = class { let i = 0; const args = instr_args.args; for (; i < schema.arguments.length; i++) { - const v = args[i]; + const index = args[i]; const arg = schema && i < schema.arguments.length ? schema.arguments[i] : null; const output = arg ? alias(schema.arguments[i]) : null; if (output && outputs.has(output)) { - inputs.set(output, v); + inputs.set(output, index); continue; } const name = arg ? arg.name : i.toString(); - const value = values.map(v); + const value = values.map(index); const argument = new executorch.Argument(name, value.value, value.type); this.inputs.push(argument); } for (let j = 0; j < schema.returns.length; j++) { const ret = schema.returns[j]; const output = alias(ret); - const v = output && inputs.has(output) ? inputs.get(output) : args[i++]; + let index = args[i++]; + index = output && inputs.has(output) ? inputs.get(output) : index; const name = ret.name; - const value = values.map(v); + const value = values.map(index, true); const argument = new executorch.Argument(name || '', value.value, value.type); this.outputs.push(argument); } } else if (instr_args instanceof executorch.schema.DelegateCall) { const delegate = plan.delegates[instr_args.delegate_index]; + const args = instr_args.args; const name = delegate.id; this.type = { name }; + switch (name) { + case 'XnnpackBackend': { + const input = values.map(args[0]); + const output = values.map(args[1], true); + this.inputs.push(new executorch.Argument('input', input.value, input.type)); + this.outputs.push(new executorch.Argument('output', output.value, output.type)); + break; + } + case 'CoreMLBackend': { + const input = values.map(args[0]); + const output = values.map(args[1], true); + this.inputs.push(new executorch.Argument('input', input.value, input.type)); + this.outputs.push(new executorch.Argument('output', output.value, output.type)); + break; + } + default: { + throw new executorch.Error(`ExecuTorch delegate '${name}' not implemented.`); + } + } + for (const spec of delegate.compile_specs) { + const value = ArrayBuffer.isView(spec.value) ? Array.from(spec.value) : spec.value; + const attribute = new executorch.Argument(spec.key, value); + this.attributes.push(attribute); + } } else { - throw new Error('Instruction argument not implemented.'); + throw new Error(`Instruction type '${instr_args.constructor.name}' not implemented.`); } } }; @@ -177,23 +212,22 @@ executorch.Node = class { executorch.TensorType = class { constructor(tensor) { - const ScalarType = executorch.schema.ScalarType; - switch (tensor.scalar_type) { - - case ScalarType.BOOL: this.dataType = 'boolean'; break; - case ScalarType.BYTE: this.dataType = 'uint8'; break; - case ScalarType.CHAR: this.dataType = 'int8'; break; - case ScalarType.SHORT: this.dataType = 'int16'; break; - case ScalarType.INT: this.dataType = 'int32'; break; - case ScalarType.LONG: this.dataType = 'int64'; break; - case ScalarType.HALF: this.dataType = 'float16'; break; - case ScalarType.FLOAT: this.dataType = 'float32'; break; - case ScalarType.DOUBLE: this.dataType = 'float64'; break; - case ScalarType.UINT16: this.dataType = 'uint16'; break; - case ScalarType.UINT32: this.dataType = 'uint32'; break; - case ScalarType.UINT64: this.dataType = 'uint64'; break; - default: throw new executorch.Error(`Unknown tensor data type '${tensor.scalar_type}'.`); + executorch.TensorType._types = executorch.TensorType._types || [ + 'uint8', + 'int8', 'int16', 'int32', 'int64', + 'float16', 'float32', 'float64', + 'complex16', 'complex32', 'complex64', + 'boolean', + 'qint8', 'quint8', 'qint32', + 'bfloat16', + 'quint4x2', 'quint2x4', 'bits1x8', 'bits2x4', 'bits4x2', 'bits8', 'bits16', + 'float8e5m2', 'float8e4m3fn', 'float8e5m2fnuz', 'float8e4m3fnuz', + 'uint16', 'uint32', 'uint64' + ]; + if (tensor.scalar_type >= executorch.TensorType._types.length) { + throw new executorch.Error(`Unknown tensor data type '${tensor.scalar_type}'.`); } + this.dataType = executorch.TensorType._types.length[tensor.scalar_type]; this.shape = new executorch.TensorShape(Array.from(tensor.sizes)); } diff --git a/source/python.js b/source/python.js index 43122e4b7d..22d21e5b67 100644 --- a/source/python.js +++ b/source/python.js @@ -18340,7 +18340,7 @@ python.Execution = class { torch.quint2x4 = new torch.dtype(17, 'quint2x4'); torch.bits1x8 = new torch.dtype(18, 'bits1x8'); torch.bits2x4 = new torch.dtype(19, 'bits2x4'); - torch.bits2x4 = new torch.dtype(20, 'bits2x4'); + torch.bits4x2 = new torch.dtype(20, 'bits4x2'); torch.bits8 = new torch.dtype(21, 'bits8'); torch.bits16 = new torch.dtype(22, 'bits16'); torch.float8_e5m2 = new torch.dtype(23, 'float8_e5m2', 1); diff --git a/source/pytorch-metadata.json b/source/pytorch-metadata.json index c6c63ab853..b5f35c77b0 100755 --- a/source/pytorch-metadata.json +++ b/source/pytorch-metadata.json @@ -1172,6 +1172,24 @@ { "name": "aten::unsqueeze_copy.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)" }, + { + "name": "aten::squeeze_copy(Tensor self) -> Tensor" + }, + { + "name": "aten::squeeze_copy.dim(Tensor self, int dim) -> Tensor" + }, + { + "name": "aten::squeeze_copy.dims(Tensor self, int[] dim) -> Tensor" + }, + { + "name": "aten::squeeze_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "aten::squeeze_copy.dim_out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "aten::squeeze_copy.dims_out(Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)" + }, { "name": "aten::split_copy.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]" }, @@ -1214,6 +1232,33 @@ { "name": "aten::view_as_real_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)" }, + { + "name": "aten::copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor" + }, + { + "name": "aten::copy.out(Tensor self, Tensor src, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "aten::copy.t(t[](a) self) -> t[]" + }, + { + "name": "aten::copy.Dict_str(Dict(str, t)(a) self) -> Dict(str, t)" + }, + { + "name": "aten::copy.Dict_int(Dict(int, t)(a) self) -> Dict(int, t)" + }, + { + "name": "aten::copy.Dict_bool(Dict(bool, t)(a) self) -> Dict(bool, t)" + }, + { + "name": "aten::copy.Dict_float(Dict(float, t)(a) self) -> Dict(float, t)" + }, + { + "name": "aten::copy.Dict_complex(Dict(complex, t)(a) self) -> Dict(complex, t)" + }, + { + "name": "aten::copy.Dict_Tensor(Dict(Tensor, t)(a) self) -> Dict(Tensor, t)" + }, { "name": "aten::smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)" }, @@ -1797,6 +1842,12 @@ { "name": "aten::diag_embed.out(Tensor self, int offset=0, int dim1=-2, int dim2=-1, *, Tensor(a!) out) -> Tensor(a!)" }, + { + "name": "aten::expand_copy(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor" + }, + { + "name": "aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)" + }, { "name": "aten::view_copy(Tensor self, SymInt[] size) -> Tensor" }, @@ -2653,6 +2704,9 @@ { "name": "aten::masked_fill.Tensor_out(Tensor self, Tensor mask, Tensor value, *, Tensor(a!) out) -> Tensor(a!)" }, + { + "name": "aten::_local_scalar_dense(Tensor self) -> Scalar" + }, { "name": "aten::_unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)" }, @@ -7352,6 +7406,120 @@ { "name": "executorch_prim::et_view.default(Tensor self, int[] size) -> (Tensor out)" }, + { + "name": "executorch_prim::sub.Scalar(Scalar a, Scalar b) -> Scalar" + }, + { + "name": "executorch_prim::mul.Scalar(Scalar a, Scalar b) -> Scalar" + }, + { + "name": "executorch_prim::floordiv.Scalar(Scalar a, Scalar b) -> Scalar" + }, + { + "name": "executorch_prim::add.Scalar(Scalar a, Scalar b) -> Scalar" + }, + { + "name": "llama::sdpa_with_kv_cache.out(Tensor query, Tensor key, Tensor value, Tensor(a!) key_cache, Tensor(b!) value_cache, int start_pos, int seq_len, Tensor? attn_mask=None, float drpout_p=0.0, bool is_causal=False, float? scale=None, *, Tensor(c!) out) -> Tensor(c!)" + }, + { + "name": "llama::sdpa.out(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float drpout_p=0.0, bool is_causal=False, float? scale=None, *, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "quantized_decomposed::embedding_byte.dtype_out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "quantized_decomposed::choose_qparams.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)" + }, + { + "name": "quantized_decomposed::embedding_4bit(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices) -> Tensor" + }, + { + "name": "quantized_decomposed::embedding_4bit.dtype(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::embedding_4bit.out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "quantized_decomposed::embedding_4bit.dtype_out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "quantized_decomposed::dequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::dequantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::dequantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::add(Tensor a, float a_scale, int a_zero_point, int a_quant_min, int a_quant_max, Tensor b, float b_scale, int b_zero_point, int b_quant_min, int b_quant_max, float out_scale, int out_zero_point, int out_quant_min, int out_quant_max) -> Tensor qc" + }, + { + "name": "quantized_decomposed::add.scalar(Tensor qa, float a_scale, int a_zero_point, int a_quant_min, int a_quant_max, ScalarType a_dtype, Scalar b, float out_scale, int out_zero_point, int out_quant_min, int out_quant_max, ScalarType out_dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::add_relu(Tensor a, float a_scale, int a_zero_point, int a_quant_min, int a_quant_max, Tensor b, float b_scale, int b_zero_point, int b_quant_min, int b_quant_max, float out_scale, int out_zero_point, int out_quant_min, int out_quant_max) -> Tensor qc" + }, + { + "name": "quantized_decomposed::dequantize_per_channel(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::fake_quant_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max) -> Tensor" + }, + { + "name": "quantized_decomposed::quantize_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::choose_qparams_symmetric.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)" + }, + { + "name": "quantized_decomposed::mixed_linear(Tensor input, Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, ScalarType? dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::dequantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, ScalarType output_dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::quantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::quantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::quantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::choose_qparams_per_token_asymmetric(Tensor input, ScalarType dtype) -> (Tensor, Tensor)" + }, + { + "name": "quantized_decomposed::choose_qparams_per_token(Tensor input, ScalarType dtype) -> (Tensor, Tensor)" + }, + { + "name": "quantized_decomposed::quantize_per_channel_group(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size) -> Tensor" + }, + { + "name": "quantized_decomposed::dequantize_per_channel_group(Tensor input, Tensor scales, Tensor? zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size, ScalarType output_dtype) -> Tensor" + }, + { + "name": "quantized_decomposed::embedding_byte(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices) -> Tensor" + }, + { + "name": "quantized_decomposed::embedding_byte.dtype(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None) -> Tensor" + }, + { + "name": "quantized_decomposed::embedding_byte.out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, Tensor(a!) out) -> Tensor(a!)" + }, + { + "name": "quantized_decomposed::mixed_mm(Tensor input, Tensor weight, Tensor weight_scales, Tensor? weight_zero_points) -> Tensor" + }, + { + "name": "quantized_decomposed::_choose_qparams_per_token_asymmetric_impl(Tensor input, ScalarType dtype) -> (Tensor, Tensor)" + }, + { + "name": "quantized_decomposed::quantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype) -> Tensor" + }, + { + "name": "llama::fast_hadamard_transform.out(Tensor mat, *, Tensor(a!) out) -> Tensor(a!)" + }, { "name": "__torch__.torch.classes.rnn.CellParamsBase", "inputs": [ diff --git a/tools/pytorch_script.py b/tools/pytorch_script.py index a48e96ee9a..7391a1d99d 100644 --- a/tools/pytorch_script.py +++ b/tools/pytorch_script.py @@ -59,6 +59,10 @@ def _write_metadata(metadata): 'aten::fft(Tensor self, int signal_ndim, bool normalized=False) -> Tensor', 'aten::grid_sampler.legacy(Tensor input, Tensor grid, int interpolation_mode, int padding_mode) -> Tensor', 'executorch_prim::et_view.default(Tensor self, int[] size) -> (Tensor out)', + 'executorch_prim::add.Scalar(Scalar a, Scalar b) -> Scalar', + 'executorch_prim::sub.Scalar(Scalar a, Scalar b) -> Scalar', + 'executorch_prim::mul.Scalar(Scalar a, Scalar b) -> Scalar', + 'executorch_prim::floordiv.Scalar(Scalar a, Scalar b) -> Scalar', 'neuron::_execute_neuron(__torch__.torch.classes.neuron.Model _0, Tensor[] _1) -> Tensor[] _0', 'neuron::_from_neuron(Tensor _0) -> Tensor _0', 'neuron::_init_neuron() -> ()', @@ -211,9 +215,43 @@ def _write_metadata(metadata): 'horizon::scale_quanti(Tensor x, Tensor scale, Tensor zero_point, int d, int min, int max, bool flag1, bool flat2, str str1, str str2) -> Tensor', 'prim::isinstance(Any to_check) -> bool', 'prim::shape(Tensor self) -> int[]', + 'llama::fast_hadamard_transform.out(Tensor mat, *, Tensor(a!) out) -> Tensor(a!)', + 'llama::sdpa_with_kv_cache.out(Tensor query, Tensor key, Tensor value, Tensor(a!) key_cache, Tensor(b!) value_cache, int start_pos, int seq_len, Tensor? attn_mask=None, float drpout_p=0.0, bool is_causal=False, float? scale=None, *, Tensor(c!) out) -> Tensor(c!)', + 'llama::sdpa.out(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float drpout_p=0.0, bool is_causal=False, float? scale=None, *, Tensor(a!) out) -> Tensor(a!)', 'quantized_decomposed::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)', 'quantized_decomposed::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None, Tensor(a!) out) -> Tensor(a!)', 'quantized_decomposed::dequantize_per_tensor.Tensor_out(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None, Tensor(a!) out) -> Tensor(a!)', + 'quantized_decomposed::choose_qparams.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)', + 'quantized_decomposed::embedding_4bit(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices) -> Tensor', + 'quantized_decomposed::embedding_4bit.dtype(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None) -> Tensor', + 'quantized_decomposed::embedding_4bit.out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, Tensor(a!) out) -> Tensor(a!)', + 'quantized_decomposed::embedding_4bit.dtype_out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)', + 'quantized_decomposed::dequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor', + 'quantized_decomposed::dequantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor', + 'quantized_decomposed::dequantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor', + 'quantized_decomposed::add(Tensor a, float a_scale, int a_zero_point, int a_quant_min, int a_quant_max, Tensor b, float b_scale, int b_zero_point, int b_quant_min, int b_quant_max, float out_scale, int out_zero_point, int out_quant_min, int out_quant_max) -> Tensor qc', + 'quantized_decomposed::add.scalar(Tensor qa, float a_scale, int a_zero_point, int a_quant_min, int a_quant_max, ScalarType a_dtype, Scalar b, float out_scale, int out_zero_point, int out_quant_min, int out_quant_max, ScalarType out_dtype) -> Tensor', + 'quantized_decomposed::add_relu(Tensor a, float a_scale, int a_zero_point, int a_quant_min, int a_quant_max, Tensor b, float b_scale, int b_zero_point, int b_quant_min, int b_quant_max, float out_scale, int out_zero_point, int out_quant_min, int out_quant_max) -> Tensor qc', + 'quantized_decomposed::dequantize_per_channel(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensor', + 'quantized_decomposed::fake_quant_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max) -> Tensor', + 'quantized_decomposed::quantize_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype) -> Tensor', + 'quantized_decomposed::choose_qparams_symmetric.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)', + 'quantized_decomposed::mixed_linear(Tensor input, Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, ScalarType? dtype=None) -> Tensor', + 'quantized_decomposed::dequantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, ScalarType output_dtype) -> Tensor', + 'quantized_decomposed::quantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensor', + 'quantized_decomposed::quantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensor', + 'quantized_decomposed::quantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype) -> Tensor', + 'quantized_decomposed::choose_qparams_per_token_asymmetric(Tensor input, ScalarType dtype) -> (Tensor, Tensor)', + 'quantized_decomposed::choose_qparams_per_token(Tensor input, ScalarType dtype) -> (Tensor, Tensor)', + 'quantized_decomposed::quantize_per_channel_group(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size) -> Tensor', + 'quantized_decomposed::dequantize_per_channel_group(Tensor input, Tensor scales, Tensor? zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size, ScalarType output_dtype) -> Tensor', + 'quantized_decomposed::embedding_byte(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices) -> Tensor', + 'quantized_decomposed::embedding_byte.dtype(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None) -> Tensor', + 'quantized_decomposed::embedding_byte.out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, Tensor(a!) out) -> Tensor(a!)', + 'quantized_decomposed::embedding_byte.dtype_out(Tensor weight, Tensor weight_scales, Tensor? weight_zero_points, int weight_quant_min, int weight_quant_max, Tensor indices, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)', + 'quantized_decomposed::mixed_mm(Tensor input, Tensor weight, Tensor weight_scales, Tensor? weight_zero_points) -> Tensor', + 'quantized_decomposed::_choose_qparams_per_token_asymmetric_impl(Tensor input, ScalarType dtype) -> (Tensor, Tensor)', + 'quantized_decomposed::quantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype) -> Tensor', 'torch_sparse::hgt_sample(Dict(str, Tensor) _0, Dict(str, Tensor) _1, Dict(str, Tensor) _2, Dict(str, int[]) _3, int _4) -> (Dict(str, Tensor) _0, Dict(str, Tensor) _1, Dict(str, Tensor) _2, Dict(str, Tensor) _3)', 'torch_sparse::cuda_version() -> int _0', 'torch_sparse::random_walk(Tensor _0, Tensor _1, Tensor _2, int _3) -> Tensor _0',