Skip to content

Commit

Permalink
Fix: Rename image inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Apr 20, 2024
1 parent 7ac33bd commit 4f1568f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 15 deletions.
20 changes: 7 additions & 13 deletions python/scripts/export_encoders.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"\n",
"Depending on the backend, we prefer different qunatization schemes.\n",
"\n",
"- For ONNX we use `int8` quantization.\n",
"- For ONNX we use `uint8` quantization.\n",
"- For PyTorch we use `bfloat16` quantization.\n",
"- For CoreML we use `float32` representation."
]
Expand All @@ -19,6 +19,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip uninstall -y uform\n",
"!pip install --upgrade \"uform[torch]\" coremltools"
]
},
Expand All @@ -42,7 +43,7 @@
"import uform\n",
"from PIL import Image\n",
"\n",
"model, processor = uform.get_model('unum-cloud/uform-vl-english-small')\n",
"model, processor = uform.get_model('unum-cloud/' + model_name)\n",
"text = 'a small red panda in a zoo'\n",
"image = Image.open('../../assets/unum.png')\n",
"\n",
Expand Down Expand Up @@ -122,7 +123,7 @@
"CoreML Tools provides a way to convert ONNX models to CoreML models. This script demonstrates how to convert an ONNX model to a CoreML model. For that, we need to provide an example input, and the tensor shapes will be inferred from that.\n",
"\n",
"```python\n",
" image_input = ct.TensorType(name=\"input\", shape=image_data.shape)\n",
" image_input = ct.TensorType(name=\"images\", shape=image_data.shape)\n",
" text_input = ct.TensorType(name=\"input_ids\", shape=text_data[\"input_ids\"].shape)\n",
" text_attention_input = ct.TensorType(name=\"attention_mask\", shape=text_data[\"attention_mask\"].shape)\n",
"```\n",
Expand Down Expand Up @@ -155,7 +156,7 @@
"metadata": {},
"outputs": [],
"source": [
"image_input = ct.TensorType(name=\"input\", shape=generalize_first_dimensions(image_data.shape, 1))\n",
"image_input = ct.TensorType(name=\"images\", shape=generalize_first_dimensions(image_data.shape, 1))\n",
"text_input = ct.TensorType(name=\"input_ids\", shape=generalize_first_dimensions(text_data[\"input_ids\"].shape, 1))\n",
"text_attention_input = ct.TensorType(name=\"attention_mask\", shape=generalize_first_dimensions(text_data[\"attention_mask\"].shape, 1))\n",
"text_features = ct.TensorType(name=\"features\")\n",
Expand Down Expand Up @@ -403,10 +404,10 @@
" export_params=True,\n",
" opset_version=15,\n",
" do_constant_folding=True,\n",
" input_names = ['input'], \n",
" input_names = ['images'], \n",
" output_names = ['features', 'embeddings'],\n",
" dynamic_axes={\n",
" 'input' : {0 : 'batch_size'},\n",
" 'images' : {0 : 'batch_size'},\n",
" 'features' : {0 : 'batch_size'},\n",
" 'embeddings' : {0 : 'batch_size'}})"
]
Expand Down Expand Up @@ -632,13 +633,6 @@
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../image_encoder.pt image_encoder.pt\n",
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../text_encoder.pt text_encoder.pt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
4 changes: 2 additions & 2 deletions swift/Encoders.swift
Original file line number Diff line number Diff line change
Expand Up @@ -402,14 +402,14 @@ class ImageInput: MLFeatureProvider {
}

var featureNames: Set<String> {
return Set(["input"])
return Set(["images"])
}

// The model expects the input IDs to be an array of integers
// of length `sequenceLength`, padded with `paddingID` if necessary
func featureValue(for featureName: String) -> MLFeatureValue? {
switch featureName {
case "input":
case "images":
return precomputedFeature
default:
return nil
Expand Down

0 comments on commit 4f1568f

Please sign in to comment.