From b4267c350dca94909911046b4b8a7f4ae90649ae Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Wed, 6 Apr 2022 14:52:32 -0400 Subject: [PATCH 01/14] First draft of micro train tutorial --- .../how_to/work_with_microtvm/micro_train.py | 538 ++++++++++++++++++ 1 file changed, 538 insertions(+) create mode 100644 gallery/how_to/work_with_microtvm/micro_train.py diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py new file mode 100644 index 000000000000..5b613dd9fb77 --- /dev/null +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -0,0 +1,538 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +.. _microTVM-with-TFLite: + +Training Vision Models for microTVM +=========================== +**Author**: `Gavin Uberti `_ + +This tutorial shows how MobileNetV1 models can be trained +to fit on embedded devices, and how those models can be +deployed to Arduino using TVM. +""" + +###################################################################### +# .. note:: +# # This tutorial is best viewed as a Jupyter Notebook. You can download and run it locally +# # using the link at the bottom of this page, or open it online for free using Google Colab. +# +# Motivation +# ----- +# When building IOT devices, we often want them to **see and understand** the world around them. +# This can take many forms, but often times a device will want to know if a certain **kind of +# object** is in its field of vision. For example: +# * A security camera might look for **people**, so it can decide whether to save a video to memory. +# * A traffic light might look for **cars**, so it can judge which lights should change first. +# * A forest camera might want to look for a **kind of animal**, so they can estimate how large the animal population is. +# To make these devices affordable, we would like them to need only a low-cost processor like the +# `nRF52840` (costing $5 each on Mouser) or the `RP2040` (just $1.45 each!). +# +# These devices have very little memory (~250 KB RAM), meaning that no conventional edge AI +# vision model (like MobileNet or EfficientNet) will be able to run. In this tutorial, we will +# show how these models can be modified to work around this requirement. Then, we will use TVM +# to compile and deploy it for an Arduino that uses one of these processors. +# +# Installing the Prerequisites +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# To run this tutorial, we will need Tensorflow and TFLite to train our model, pyserial and tlcpack +# (a community build of TVM) to compile and test it, and imagemagick and curl to preprocess data. +# We will also need to install the Arduino CLI and the mbed_nano package to test our model. +# .. code-block:: bash +# pip install -q tensorflow tflite pyserial +# pip install -q tlcpack-nightly -f https://tlcpack.ai/wheels +# apt-get -qq install imagemagick curl +# +# # Install Arduino CLI and library for Nano 33 BLE +# curl -fsSL https://raw.githubusercontent.com/arduino/arduino-cli/master/install.sh | sh +# /content/bin/arduino-cli core update-index +# /content/bin/arduino-cli core install arduino:mbed_nano +# +# Using the GPU +# ^^^^^^^^^^^^^ +# +# This tutorial demonstrates training a neural network, which is requires a lot of computing power +# and will go much faster if you have a GPU. If you are viewing this tutorial on Google Colab, you +# can enable a GPU by going to **Runtime->Change runtime type** and selecting "GPU" as our hardware +# accelerator. If you are running locally, you can `follow Tensorflow's guide` instead. +# +# We can test our GPU installation with the following code: + +import tensorflow as tf +if not tf.test.gpu_device_name(): + print("No GPU was detected!") + print("Model training will take much longer (~30 minutes instead of ~5)") +else: + print("GPU detected - you're good to go.") + +###################################################################### +# Downloading the Data +# ----- +# Convolutional neural networks usually learn by looking at many images, along with labels telling +# the network what those images are. To get these images, we'll need a publicly available dataset +# with thousands of images of all sorts of objects and labels of what's in each image. We'll also +# need a bunch of images that **aren't** of cars, as we're trying to distinguish these two classes. +# +# In this tutorial, we'll create a model to detect if an image contains a **car**, but you can use +# whatever category you like! Just change the source URL below to one containing images of another +# type of object. +# +# To get our car images, we'll be downloading the `Stanford Cars dataset` , +# which contains 16,185 full color images of cars. We'll also need images of random things that +# aren't cars, so we'll use the `COCO 2014` validation set (it's +# smaller, and thus faster to download than the full training set. Training on the full data set +# would yield better results). Note that there are some cars in the COCO 2017 data set, but it's +# a small enough fraction not to matter - just keep in mind that this will drive down our percieved +# accuracy slightly. +# +# We could use the Tensorflow dataloader utilities, but we'll instead do it manually to make sure +# it's easy to change the datasets being used. We'll end up with the following file hierarchy: +# +# .. code-block:: +# +# /root +# ├── images +# │ ├── object +# │ │ ├── 000001.jpg +# │ │ │ ... +# │ │ └── 016185.jpg +# │ ├── object.tgz +# │ ├── random +# │ │ ├── 000000000139.jpg +# │ │ │ ... +# │ │ └── 000000581781.jpg +# │ └── random.zip +# +# We should also note that Stanford cars has 16k images, while the COCO 2017 validation set is 5k +# images - it is not a 50/50 split! If we wanted to, we could weight these classes differently +# during training to correct for this, but training will still work if we ignore it. It should +# take about **2 minutes** to download the Stanford Cars, while COCO 2017 validation will take +# **1 minute**. + +# .. code-block:: bash +# +# # Download and extract our car images +# mkdir -p /root/images/object/ +# curl "http://ai.stanford.edu/~jkrause/car196/car_ims.tgz" -o ~/images/object.tgz +# tar -xf ~/images/object.tgz --strip-components 1 -C ~/images/object# +# +# # Download and extract other images +# mkdir -p /root/images/random/ +# curl "http://images.cocodataset.org/zips/val2017.zip" -o ~/images/random.zip +# unzip -jqo ~/images/random.zip -d ~/images/random + + +###################################################################### +# Loading the Data +# ----- +# Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have +# to load the images into memory, resize them to be 64x64, and convert them to raw, uncompressed +# data. Keras's `image_dataset_from_directory` will take care of most of this, though it loads +# images such that each pixel value is a float from 0 to 255. +# +# We'll also need to load labels, though Keras will help with this. From our subdirectory structure, +# it knows the images in `/objects` are one class, and those in `/random` another. Setting +# `label_mode='categorical'` tells Keras to convert these into **categorical labels** - a 2x1 vector +# that's either `[1, 0]` for an object of our target class, or `[0, 1]` vector for anything else. +# We'll also set `shuffle=True` to randomize the order of our examples. + +# Lastly, in machine learning we generally want our inputs to be small numbers. We'll thus use a +# `Rescaling` layer to change our images such that each pixel is a float between `0.0` and `1.0`, +# instead of `0` to `255`. We need to be careful not to rescale our categorical labels though, so +# we'll use a `lambda` function. + +import tensorflow as tf + +unscaled_dataset = tf.keras.utils.image_dataset_from_directory( + "/root/images", + batch_size=None, + shuffle=True, + label_mode='categorical', + image_size=(96, 96), +) +rescale = tf.keras.layers.Rescaling(scale=1.0/255) +full_dataset = unscaled_dataset.map(lambda im, lbl: (rescale(im), lbl)) + +###################################################################### +# What's Inside Our Dataset? +# ^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Before giving this data set to our neural network, we ought to give it a quick visual inspection. +# Does the data look properly transformed? Do the labels seem appropriate? And what's our ratio of +# objects to other stuff? We can display some examples from our datasets using `matplotlib`: + +import matplotlib.pyplot as plt +from os import listdir + +print("/images/random contains %d images" % len(listdir("/root/images/random/"))) +print("/images/target contains %d images" % len(listdir("/root/images/object/"))) + +SAMPLES_TO_SHOW = 10 +plt.figure(figsize=(20, 10)) +for i, (image, label) in enumerate(unscaled_dataset): + if i >= SAMPLES_TO_SHOW: + break + ax = plt.subplot(1, SAMPLES_TO_SHOW, i + 1) + plt.imshow(image.numpy().astype("uint8")) + plt.title(list(label.numpy())) + plt.axis("off") + +###################################################################### +# What's Inside Our Dataset? +# ^^^^^^^^^^^^^^^^^^^^^^^^^^ +# While developing our model, we'll often want to check how accurate it is (e.g. to see if it +# improves during training). How do we do this? We could just train it on *all* of the data, and +# then ask it to classify that same data. However, our model could cheat by just memorizing all of +# the samples, which would make it *appear* to have very high accuracy, but perform very badly in +# reality. In practice, this "memorizing" is called **overfitting**. +# +# To prevent this, we will set aside some of the data (we'll use 20%) as a **validation set**. Our +# model will never be trained on validation data - we'll only use it to check our model's accuracy. +# +# We will also **batch** the data - grouping samples into clumps to make our training go faster. +# Setting `BATCH_SIZE = 32` is a decent number. + +BATCH_SIZE = 32 +num_images = len(full_dataset) +train_dataset = full_dataset.take(int(num_images * 0.8)).batch(BATCH_SIZE) +validation_dataset = full_dataset.skip(len(train_dataset)).batch(BATCH_SIZE) + +###################################################################### +# Loading the Data +# ----- +# In the past decade, `convolutional neural networks` have been widely +# adopted for image classification tasks. State-of-the-art models like `EfficientNet V2` are able +# to perform image classification better than even humans! Unfortunately, these models have tens of +# millions of parameters, and thus won't fit on cheap security camera computers. +# +# Our applications generally don't need perfect accuracy - 90% is good enough. We can thus use the +# older and smaller MobileNet V1 architecture. But this *still* won't be small enough - by default, +# MobileNet V1 with 224x224 inputs and depth 1.0 takes ~50 MB to just **store**. To reduce the size +# of the model, there are three knobs we can turn. First, we can reduce the size of the input images +# from 224x224 to 96x96 or 64x64, and Keras makes it easy to do this. We can also reduce the **depth** +# of the model, from 1.0 to 0.25. And if we were really strapped for space, we could reduce the +# number of **channels** by making our model take grayscale images instead of RGB ones. +# +# In this tutorial, we will use an RGB 64x64 input image and 0.25 depth scale. This is not quite +# ideal, but it allows the finished model to fit in 192 KB of RAM, while still letting us perform +# transfer learning using the official Tensorflow source models (if we used depth scale <0.25 or +# a grayscale input, we wouldn't be able to do this). +# +# What is Transfer Learning? +# ^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Deep learning has `dominated image classification` for a long time, +# but training neural networks takes a lot of time. When a neural network is trained "from scratch", +# its parameters start out randomly initialized, forcing it to learn very slowly how to tell images +# apart. +# +# With transfer learning, we instead start with a neural network that's **already** good at a +# specific task. In this example, that task is classifying images from `the ImageNet database`. This +# means the network already has some object detection capabilities, and is likely closer to what you +# want then a random model would be. +# +# This works especially well with image processing neural networks like MobileNet. In practice, it +# turns out the convolutional layers of the model (i.e. the first 90% of the layers) are used for +# identifying low-level features like lines and shapes - only the last few fully connected layers +# are used to determine how those shapes make up the objects the network is trying to detect. +# +# We can take advantage of this by starting training with a MobileNet model that was trained on +# ImageNet, and already knows how to identify those lines and shapes. We can then just remove the +# last few layers from this pretrained model, and add our own final layers. We'll then train this +# conglomerate model for a few epochs on our cars vs non-cars dataset, to fine tune the first layers +# and train from scratch the last layers. +# +# Source MobileNets for transfer learning have been `pretrained by the Tensorflow folks`, so we +# can just download the one closest to what we want (the 128x128 input model with 0.25 depth scale). + +!mkdir -p /root/models +!curl "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5" \ + -o ~/models/mobilenet_2_5_128_tf.h5 +IMAGE_SIZE = (96, 96, 3) +pretrained = tf.keras.applications.MobileNet( + input_shape = IMAGE_SIZE, + weights = "/root/models/mobilenet_2_5_128_tf.h5", + alpha = 0.25 +) + +###################################################################### +# Modifying Our Network +# ^^^^^^^^^^^^^^^^^^^^^ +# As mentioned above, our pretrained model is designed to classify the 1,000 ImageNet categories, +# but we want to convert it to classify cars. Since only the bottom few layers are task-specific, +# we'll **cut off the last five layers** of our original model. In their place we'll build our own +# "tail" to the model by performing respape, dropout, flatten, and softmax operations. + +model = tf.keras.models.Sequential() + +model.add(tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE)) +model.add(tf.keras.Model( + inputs=pretrained.inputs, + outputs=pretrained.layers[-5].output +)) + +model.add(tf.keras.layers.Reshape((-1,))) +model.add(tf.keras.layers.Dropout(0.1)) +model.add(tf.keras.layers.Flatten()) +model.add(tf.keras.layers.Dense(2, activation='softmax')) + +###################################################################### +# Training Our Network +# ^^^^^^^^^^^^^^^^^^^^ +# When training neural networks, we must set a parameter called the **learning rate** that controls +# how fast our network learns. It must be set carefully - too slow, and our network will take +# forever to train; too fast, and our network won't be able to learn some fine details. Generally +# for Adam (the optimizer we're using), `0.001` is a pretty good learning rate (and is what's +# recommended in the `original paper`). However, in this case +# `0.0005` seems to work a little better. +# +# We'll also pass the validation set from earlier to `model.fit`. This will evaluate how good our +# model is each time we train it, and let us track how our model is improving. Once training is +# finished, the model should have a validation accuracy around `0.98` (meaning it was right 98% of +# the time on our validation set). + +model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), + loss='categorical_crossentropy', + metrics=['accuracy']) +model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) + +###################################################################### +# Quantization +# ------------ +# We've done a decent job of reducing our model's size so far - changing the input dimension, +# along with removing the bottom layers reduced the model to just 219k parameters. However, each of +# these parameters is a `float32` that takes four bytes, so our model will take up almost one MB! +# +# Additionally, it might be the case that our hardware doesn't have built-in support for floating +# point numbers. While most high-memory Arduinos (like the Nano 33 BLE) do have hardware support, +# some others (like the Arduino Due) do not. On any boards *without* dedicated hardware support, +# floating point multiplication will be extremely slow. +# +# To address both issues we will **quantize** the model - representing the weights as eight bit +# integers. It's more complex than just rounding, though - to get the best performance, TensorFlow +# tracks how each neuron in our model activates, so we can figure out how to best represent the +# while being relatively truthful to the original model. +# +# We will help TensorFlow do this by creating a representative dataset - a subset of the original +# that is used for tracking how those neurons activate. We'll then pass this into a `TFLiteConverter` +# (Keras itself does not have quantization support) with an `Optimize` flag to tell TFLite to perform +# the conversion. By default, TFLite keeps the inputs and outputs of our model as floats, so we must +# explicitly tell it to avoid this behavior. + +converter = tf.lite.TFLiteConverter.from_keras_model(model) +def representative_dataset(): + for input_value in full_dataset.batch(1).take(100): + yield [input_value[0]] + +converter.optimizations = [tf.lite.Optimize.DEFAULT] +converter.representative_dataset = representative_dataset +converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] +converter.inference_input_type = tf.uint8 +converter.inference_output_type = tf.uint8 + +quantized_model = converter.convert() + +###################################################################### +# Download the Model if Desired +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# We've now got a finished model, that you can use locally or in other tutorials (try autotuning +# this model or viewing it on `https://netron.app/`. If you're running this +# tutorial on Google Colab, then the code below will let you download your `.tflite` model. +# +# .. code-block:: python +# from google.colab import files +# QUANTIZED_MODEL_PATH = '/root/models/quantized.tflite' +# with open(QUANTIZED_MODEL_PATH, 'wb') as f: +# f.write(quantized_model) +# files.download(QUANTIZED_MODEL_PATH) + +###################################################################### +# Compiling With TVM For Arduino +# ------------------------------ +# Tensorflow has a built-in framework for deploying to microcontrollers - `TFLite Micro`. However, +# it's poorly supported by development boards, and does not support autotuning. We will use Apache +# TVM instead. +# +# TVM can be used either with its command line interface (`tvmc`) or with its Python interface. The +# Python interface is fully-featured and more stable, so we'll use it here. +# +# TVM is an optimizing compiler, and optimizations to our model are performed in stages via +# **intermediate representations**. The first of these is `Relay` a high-level intermediate +# representation emphasizing portability. The conversion from `.tflite` to Relay is done without any +# knowledge of our "end goal" - the fact we intend to run this model on an Arduino. +# +# Choosing an Arduino Board +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# Next, we'll have to decide exactly which Arduino board to use. The Arduino sketch that we +# ultimately generate should be compatible with any board, but knowing which board we are using in +# advance allows TVM to adjust its compilation strategy to get better performance. +# +# There is one catch - we need enough **memory** (flash and RAM) to be able to run our model. We +# won't ever be able to run a complex vision model like a MobileNet on an Arduino Uno - that board +# only has 2 kB of RAM and 32 kB of flash! Our model has ~200,000 parameters, so there is just no +# way it could fit. +# +# For this tutorial, we will use the Nano 33 BLE, which has 1 MB of flash memory and 256 KB of RAM. +# However, any other Arduino with those specs or better should also work. +# +# Generating our project +# ^^^^^^^^^^^^^^^^^^^^^^ +# Next, we'll compile the model to TVM's MLF (machine learning format) intermediate representation, +# which consists of C/C++ code and is designed for autotuning. To improve performance, we'll tell +# TVM that we're compiling for the `nrf52840` microprocessor (the one the Nano 33 BLE uses). We'll +# also tell it to use the C runtime (abbreviated `crt`) and to use ahead-of-time memory allocation +# (abbreviated `aot`, which helps reduce the model's memory footprint). Lastly, we will disable +# vectorization with `"tir.disable_vectorize": True`, as C has no native vectorized types. +# +# Once we have set these configuration parameters, we will call `tvm.relay.build` to compile our +# Relay model into the MLF intermediate representation. From here, we just need to call +# `tvm.micro.generate_project` and pass in the Arduino template project to finish compilation. +import tflite +import tvm + +# Convert to the Relay intermediate representation +tflite_model = tflite.Model.GetRootAsModel(quantized_model, 0) +mod, params = tvm.relay.frontend.from_tflite(tflite_model) + +# Set configuration flags to improve performance +target = tvm.target.target.micro("nrf52840") +runtime = tvm.relay.backend.Runtime("crt") +executor = tvm.relay.backend.Executor("aot", {"unpacked-api": True}) + +# Convert to the MLF intermediate representation +with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + mod = tvm.relay.build(mod, target, runtime=runtime, executor=executor, params=params) + +# Generate an Arduino project from the MLF intermediate representation +!rm -rf /root/models/project +arduino_project = tvm.micro.generate_project( + tvm.micro.get_microtvm_template_projects("arduino"), + mod, + '/root/models/project', + { + "arduino_board": "nano33ble", + "arduino_cli_cmd": "/content/bin/arduino-cli", + "project_type": "example_project", + }, +) + +###################################################################### +# Testing our Arduino Project +# --------------------------- +# Consider the following two 224x224 images from the author's camera roll - one of a car, one not. +# We will test our Arduino project by loading both of these images, and executing the compiled model +# on them both. +# +# Currently, these are 224x224 PNG images we can download from Imgur. Before we can feed in these +# images, we'll need to resize and convert them to raw data, which can be done with `imagemagick`. +# +# It's also challenging to load raw data onto an Arduino, as only C/CPP files (and similar) are +# compiled. We can work around this by embedding our raw data in a hard-coded C array with the +# built-in utility `bin2c`, that will output a file resembling the following: +# +# .. code-block:: c++ +# static const unsigned char CAR_IMAGE[] = { +# 0x22,0x23,0x14,0x22, +# ... +# 0x07,0x0e,0x08,0x08 +# }; +# +# We can do both of these things with a few lines of Bash code: +# +# .. code-block:: bash +# mkdir -p /root/tests +# curl "https://i.imgur.com/JBbEhxN.png" -o ~/tests/car_224.png +# convert ~/tests/car_224.png -resize 64 ~/tests/car_64.png +# stream ~/tests/car_64.png ~/tests/car.raw +# bin2c -c -st ~/tests/car.raw --name CAR_IMAGE > ~/models/project/car.c +# +# curl "https://i.imgur.com/wkh7Dx2.png" -o ~/tests/catan_224.png +# convert ~/tests/catan_224.png -resize 64 ~/tests/catan_64.png +# stream ~/tests/catan_64.png ~/tests/catan.raw +# bin2c -c -st ~/tests/catan.raw --name CATAN_IMAGE > ~/models/project/catan.c + +###################################################################### +# Writing our Arduino Script +# -------------------------- +# We now need a little bit of Arduino code to read the two binary arrays we just generated, run the +# model on them, and log the output to the serial monitor. This file will replace `arduino_sketch.ino` +# as the main file of our sketch. You'll have to copy this code in manually. +# +# .. code-block:: bash +# #include "src/model.h" +# #include "car.c" +# #include "catan.c" +# +# void setup() { +# Serial.begin(9600); +# TVMInitialize(); +# } +# +# void loop() { +# uint8_t result_data[2]; +# Serial.println("Car results:"); +# TVMExecute(const_cast(CAR_IMAGE), result_data); +# Serial.print(result_data[0]); Serial.print(", "); +# Serial.print(result_data[1]); Serial.println(); +# +# Serial.println("Other object results:"); +# TVMExecute(const_cast(CATAN_IMAGE), result_data); +# Serial.print(result_data[0]); Serial.print(", "); +# Serial.print(result_data[1]); Serial.println(); +# +# delay(1000); +# } +# +# Compiling our Code +# ^^^^^^^^^^^^^^^^^^ +# Now that our project has been generated, TVM's job is mostly done! We can still call +# `arduino_project.build()` and `arduino_project.upload()`, but these just use `arduino-cli`'s +# compile and flash commands underneath. We could also begin autotuning our model, but that's a +# subject for a different tutorial. To finish up, we'll first test that our program compiles does +# not throw any compiler errors: + +arduino_project.build() +arduino_project.upload() +print("Compilation succeeded!") + +###################################################################### +# Expected Results +# ^^^^^^^^^^^^^^^^^^ +# If all works as expected, you should see the following output on a Serial monitor: +# +# .. code-block:: +# Car results: +# 255, 0 +# Other object results: +# 0, 255 +# +# The first number represents the model's confidence that the object **is** a car, and ranges from +# 0-255. The second number represents the model's confidence that the object **is not** a car, and +# is also 0-255. These results mean the model is very sure that the first image is a car, and the +# second image is not (which is correct). Hence, our model is working! +# +# Summary +# ------- +# In this tutorial, we used transfer learning to quickly train an image recognition model to +# identify cars. We modified its input dimensions and last few layers to make it better at this, +# and to make it faster and smaller. We then quantified the model and compiled it using TVM to +# create an Arduino sketch. Lastly, we tested the model using two static images, to prove it works +# as intended. +# +# Next Steps +# ^^^^^^^^^^ +# From here, we could modify the model to read live images from the camera - we have another +# Arduino tutorial for how to do that `on GitHub`. Alternatively, we could also +# `use TVM's autotuning capabilities` to dramatically improve the model's performance. From 4566b576a33614936279716b5b9c60eefe696679 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Fri, 8 Apr 2022 21:23:16 -0400 Subject: [PATCH 02/14] unit test code --- .../how_to/work_with_microtvm/micro_train.py | 173 +++++++++++------- 1 file changed, 109 insertions(+), 64 deletions(-) diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index 5b613dd9fb77..0f6a46b54baa 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. """ -.. _microTVM-with-TFLite: +.. _microtvm-train-arduino: Training Vision Models for microTVM -=========================== +=================================== **Author**: `Gavin Uberti `_ This tutorial shows how MobileNetV1 models can be trained @@ -28,11 +28,12 @@ ###################################################################### # .. note:: -# # This tutorial is best viewed as a Jupyter Notebook. You can download and run it locally -# # using the link at the bottom of this page, or open it online for free using Google Colab. +# +# This tutorial is best viewed as a Jupyter Notebook. You can download and run it locally +# using the link at the bottom of this page, or open it online for free using Google Colab. # # Motivation -# ----- +# ---------- # When building IOT devices, we often want them to **see and understand** the world around them. # This can take many forms, but often times a device will want to know if a certain **kind of # object** is in its field of vision. For example: @@ -52,7 +53,9 @@ # To run this tutorial, we will need Tensorflow and TFLite to train our model, pyserial and tlcpack # (a community build of TVM) to compile and test it, and imagemagick and curl to preprocess data. # We will also need to install the Arduino CLI and the mbed_nano package to test our model. +# # .. code-block:: bash +# # pip install -q tensorflow tflite pyserial # pip install -q tlcpack-nightly -f https://tlcpack.ai/wheels # apt-get -qq install imagemagick curl @@ -64,7 +67,6 @@ # # Using the GPU # ^^^^^^^^^^^^^ -# # This tutorial demonstrates training a neural network, which is requires a lot of computing power # and will go much faster if you have a GPU. If you are viewing this tutorial on Google Colab, you # can enable a GPU by going to **Runtime->Change runtime type** and selecting "GPU" as our hardware @@ -79,9 +81,21 @@ else: print("GPU detected - you're good to go.") +###################################################################### +# Choosing Our Work Dir +# ^^^^^^^^^^^^^^^^^^^^^ +# We need to pick a directory where our image datasets, trained model, and eventual Arduino sketch +# will all live. If running on Google Colab, we'll save everything in `/root` (aka `~`) but you'll +# probably want to store it elsewhere if running locally. + +FOLDER = "/root" +#.. testsetup:: +import tempfile +FOLDER = tempfile.mkdtemp() + ###################################################################### # Downloading the Data -# ----- +# -------------------- # Convolutional neural networks usually learn by looking at many images, along with labels telling # the network what those images are. To get these images, we'll need a publicly available dataset # with thousands of images of all sorts of objects and labels of what's in each image. We'll also @@ -122,23 +136,34 @@ # during training to correct for this, but training will still work if we ignore it. It should # take about **2 minutes** to download the Stanford Cars, while COCO 2017 validation will take # **1 minute**. - +# # .. code-block:: bash # # # Download and extract our car images -# mkdir -p /root/images/object/ -# curl "http://ai.stanford.edu/~jkrause/car196/car_ims.tgz" -o ~/images/object.tgz -# tar -xf ~/images/object.tgz --strip-components 1 -C ~/images/object# +# mkdir -p {FOLDER}/images/object/ +# curl "http://ai.stanford.edu/~jkrause/car196/car_ims.tgz" -o {FOLDER}/images/object.tgz +# tar -xf {FOLDER}/images/object.tgz --strip-components 1 -C {FOLDER}/images/object# # # # Download and extract other images -# mkdir -p /root/images/random/ -# curl "http://images.cocodataset.org/zips/val2017.zip" -o ~/images/random.zip -# unzip -jqo ~/images/random.zip -d ~/images/random - +# mkdir -p {FOLDER}/images/random/ +# curl "http://images.cocodataset.org/zips/val2017.zip" -o {FOLDER}/images/random.zip +# unzip -jqo {FOLDER}/images/random.zip -d {FOLDER}/images/random + + +#.. doctest:: +import os +os.mkdir(FOLDER + "/images") +os.mkdir(FOLDER + "/images/object") +os.mkdir(FOLDER + "/images/random") +from PIL import Image +for category in ["object", "random"]: + for i in range(48): + img = Image.new("RGB", (100, 100), (255, 255, 255)) + img.save(FOLDER + f"/images/{category}/{i:05d}.jpg", "JPEG") ###################################################################### # Loading the Data -# ----- +# ---------------- # Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have # to load the images into memory, resize them to be 64x64, and convert them to raw, uncompressed # data. Keras's `image_dataset_from_directory` will take care of most of this, though it loads @@ -149,7 +174,10 @@ # `label_mode='categorical'` tells Keras to convert these into **categorical labels** - a 2x1 vector # that's either `[1, 0]` for an object of our target class, or `[0, 1]` vector for anything else. # We'll also set `shuffle=True` to randomize the order of our examples. - +# +# We will also **batch** the data - grouping samples into clumps to make our training go faster. +# Setting `batch_size = 32` is a decent number. +# # Lastly, in machine learning we generally want our inputs to be small numbers. We'll thus use a # `Rescaling` layer to change our images such that each pixel is a float between `0.0` and `1.0`, # instead of `0` to `255`. We need to be careful not to rescale our categorical labels though, so @@ -158,8 +186,8 @@ import tensorflow as tf unscaled_dataset = tf.keras.utils.image_dataset_from_directory( - "/root/images", - batch_size=None, + FOLDER + "/images", + batch_size=32, shuffle=True, label_mode='categorical', image_size=(96, 96), @@ -177,12 +205,12 @@ import matplotlib.pyplot as plt from os import listdir -print("/images/random contains %d images" % len(listdir("/root/images/random/"))) -print("/images/target contains %d images" % len(listdir("/root/images/object/"))) +print("/images/random contains %d images" % len(listdir(FOLDER + "/images/random/"))) +print("/images/target contains %d images" % len(listdir(FOLDER + "/images/object/"))) SAMPLES_TO_SHOW = 10 plt.figure(figsize=(20, 10)) -for i, (image, label) in enumerate(unscaled_dataset): +for i, (image, label) in enumerate(unscaled_dataset.unbatch()): if i >= SAMPLES_TO_SHOW: break ax = plt.subplot(1, SAMPLES_TO_SHOW, i + 1) @@ -201,18 +229,14 @@ # # To prevent this, we will set aside some of the data (we'll use 20%) as a **validation set**. Our # model will never be trained on validation data - we'll only use it to check our model's accuracy. -# -# We will also **batch** the data - grouping samples into clumps to make our training go faster. -# Setting `BATCH_SIZE = 32` is a decent number. -BATCH_SIZE = 32 -num_images = len(full_dataset) -train_dataset = full_dataset.take(int(num_images * 0.8)).batch(BATCH_SIZE) -validation_dataset = full_dataset.skip(len(train_dataset)).batch(BATCH_SIZE) +num_batches = len(full_dataset) +train_dataset = full_dataset.take(int(num_batches * 0.8)) +validation_dataset = full_dataset.skip(len(train_dataset)) ###################################################################### # Loading the Data -# ----- +# ---------------- # In the past decade, `convolutional neural networks` have been widely # adopted for image classification tasks. State-of-the-art models like `EfficientNet V2` are able # to perform image classification better than even humans! Unfortunately, these models have tens of @@ -256,14 +280,21 @@ # # Source MobileNets for transfer learning have been `pretrained by the Tensorflow folks`, so we # can just download the one closest to what we want (the 128x128 input model with 0.25 depth scale). +# +# .. code-block:: bash +# +# mkdir -p {FOLDER}/models +# curl "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5" \ +# -o {FOLDER}/models/mobilenet_2_5_128_tf.h5 -!mkdir -p /root/models -!curl "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5" \ - -o ~/models/mobilenet_2_5_128_tf.h5 IMAGE_SIZE = (96, 96, 3) +WEIGHTS_PATH = FOLDER + "/models/mobilenet_2_5_128_tf.h5" +#.. doctest:: +os.mkdir(FOLDER + "/models") +WEIGHTS_PATH = None pretrained = tf.keras.applications.MobileNet( input_shape = IMAGE_SIZE, - weights = "/root/models/mobilenet_2_5_128_tf.h5", + weights = WEIGHTS_PATH, alpha = 0.25 ) @@ -333,8 +364,8 @@ converter = tf.lite.TFLiteConverter.from_keras_model(model) def representative_dataset(): - for input_value in full_dataset.batch(1).take(100): - yield [input_value[0]] + for image_batch, label_batch in full_dataset.take(3): + yield [image_batch] converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset @@ -352,11 +383,13 @@ def representative_dataset(): # tutorial on Google Colab, then the code below will let you download your `.tflite` model. # # .. code-block:: python +# # from google.colab import files # QUANTIZED_MODEL_PATH = '/root/models/quantized.tflite' # with open(QUANTIZED_MODEL_PATH, 'wb') as f: # f.write(quantized_model) # files.download(QUANTIZED_MODEL_PATH) +# ###################################################################### # Compiling With TVM For Arduino @@ -399,11 +432,17 @@ def representative_dataset(): # Once we have set these configuration parameters, we will call `tvm.relay.build` to compile our # Relay model into the MLF intermediate representation. From here, we just need to call # `tvm.micro.generate_project` and pass in the Arduino template project to finish compilation. +import shutil import tflite import tvm +# Method to load model is different in TFLite 1 vs 2 +try: # TFLite 2.1 and above + tflite_model = tflite.Model.GetRootAsModel(quantized_model, 0) +except AttributeError: # Fall back to TFLite 1.14 method + tflite_model = tflite.Model.Model.GetRootAsModel(quantized_model, 0) + # Convert to the Relay intermediate representation -tflite_model = tflite.Model.GetRootAsModel(quantized_model, 0) mod, params = tvm.relay.frontend.from_tflite(tflite_model) # Set configuration flags to improve performance @@ -416,11 +455,14 @@ def representative_dataset(): mod = tvm.relay.build(mod, target, runtime=runtime, executor=executor, params=params) # Generate an Arduino project from the MLF intermediate representation -!rm -rf /root/models/project +from unittest.mock import create_autospec, MagicMock +tvm.micro.generate_project = create_autospec(tvm.micro.generate_project, return_value=MagicMock()) + +shutil.rmtree(FOLDER + "/models/project", ignore_errors=True) arduino_project = tvm.micro.generate_project( tvm.micro.get_microtvm_template_projects("arduino"), mod, - '/root/models/project', + FOLDER + "/models/project", { "arduino_board": "nano33ble", "arduino_cli_cmd": "/content/bin/arduino-cli", @@ -442,7 +484,8 @@ def representative_dataset(): # compiled. We can work around this by embedding our raw data in a hard-coded C array with the # built-in utility `bin2c`, that will output a file resembling the following: # -# .. code-block:: c++ +# .. code-block:: c +# # static const unsigned char CAR_IMAGE[] = { # 0x22,0x23,0x14,0x22, # ... @@ -452,6 +495,7 @@ def representative_dataset(): # We can do both of these things with a few lines of Bash code: # # .. code-block:: bash +# # mkdir -p /root/tests # curl "https://i.imgur.com/JBbEhxN.png" -o ~/tests/car_224.png # convert ~/tests/car_224.png -resize 64 ~/tests/car_64.png @@ -471,29 +515,30 @@ def representative_dataset(): # as the main file of our sketch. You'll have to copy this code in manually. # # .. code-block:: bash -# #include "src/model.h" -# #include "car.c" -# #include "catan.c" -# -# void setup() { -# Serial.begin(9600); -# TVMInitialize(); -# } -# -# void loop() { -# uint8_t result_data[2]; -# Serial.println("Car results:"); -# TVMExecute(const_cast(CAR_IMAGE), result_data); -# Serial.print(result_data[0]); Serial.print(", "); -# Serial.print(result_data[1]); Serial.println(); -# -# Serial.println("Other object results:"); -# TVMExecute(const_cast(CATAN_IMAGE), result_data); -# Serial.print(result_data[0]); Serial.print(", "); -# Serial.print(result_data[1]); Serial.println(); -# -# delay(1000); -# } + +# #include "src/model.h" +# #include "car.c" +# #include "catan.c" +# +# void setup() { +# Serial.begin(9600); +# TVMInitialize(); +# } +# +# void loop() { +# uint8_t result_data[2]; +# Serial.println("Car results:"); +# TVMExecute(const_cast(CAR_IMAGE), result_data); +# Serial.print(result_data[0]); Serial.print(", "); +# Serial.print(result_data[1]); Serial.println(); +# +# Serial.println("Other object results:"); +# TVMExecute(const_cast(CATAN_IMAGE), result_data); +# Serial.print(result_data[0]); Serial.print(", "); +# Serial.print(result_data[1]); Serial.println(); +# +# delay(1000); +# } # # Compiling our Code # ^^^^^^^^^^^^^^^^^^ @@ -504,7 +549,6 @@ def representative_dataset(): # not throw any compiler errors: arduino_project.build() -arduino_project.upload() print("Compilation succeeded!") ###################################################################### @@ -513,6 +557,7 @@ def representative_dataset(): # If all works as expected, you should see the following output on a Serial monitor: # # .. code-block:: +# # Car results: # 255, 0 # Other object results: From 6dd8f77e449b138d2508d29b065e06558f3c2b95 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Sat, 9 Apr 2022 02:42:49 -0400 Subject: [PATCH 03/14] Fix obvious formatting issues --- .../how_to/work_with_microtvm/micro_train.py | 97 ++++++++++--------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index 0f6a46b54baa..e6606875b091 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -41,7 +41,7 @@ # * A traffic light might look for **cars**, so it can judge which lights should change first. # * A forest camera might want to look for a **kind of animal**, so they can estimate how large the animal population is. # To make these devices affordable, we would like them to need only a low-cost processor like the -# `nRF52840` (costing $5 each on Mouser) or the `RP2040` (just $1.45 each!). +# `nRF52840 `_ (costing $5 each on Mouser) or the `RP2040 `_ (just $1.45 each!). # # These devices have very little memory (~250 KB RAM), meaning that no conventional edge AI # vision model (like MobileNet or EfficientNet) will be able to run. In this tutorial, we will @@ -50,6 +50,7 @@ # # Installing the Prerequisites # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# # To run this tutorial, we will need Tensorflow and TFLite to train our model, pyserial and tlcpack # (a community build of TVM) to compile and test it, and imagemagick and curl to preprocess data. # We will also need to install the Arduino CLI and the mbed_nano package to test our model. @@ -67,10 +68,11 @@ # # Using the GPU # ^^^^^^^^^^^^^ +# # This tutorial demonstrates training a neural network, which is requires a lot of computing power # and will go much faster if you have a GPU. If you are viewing this tutorial on Google Colab, you # can enable a GPU by going to **Runtime->Change runtime type** and selecting "GPU" as our hardware -# accelerator. If you are running locally, you can `follow Tensorflow's guide` instead. +# accelerator. If you are running locally, you can `follow Tensorflow's guide `_ instead. # # We can test our GPU installation with the following code: @@ -85,7 +87,7 @@ # Choosing Our Work Dir # ^^^^^^^^^^^^^^^^^^^^^ # We need to pick a directory where our image datasets, trained model, and eventual Arduino sketch -# will all live. If running on Google Colab, we'll save everything in `/root` (aka `~`) but you'll +# will all live. If running on Google Colab, we'll save everything in ``/root`` (aka ``~``) but you'll # probably want to store it elsewhere if running locally. FOLDER = "/root" @@ -105,9 +107,9 @@ # whatever category you like! Just change the source URL below to one containing images of another # type of object. # -# To get our car images, we'll be downloading the `Stanford Cars dataset` , +# To get our car images, we'll be downloading the `Stanford Cars dataset `_, # which contains 16,185 full color images of cars. We'll also need images of random things that -# aren't cars, so we'll use the `COCO 2014` validation set (it's +# aren't cars, so we'll use the `COCO 2017 ` validation set (it's # smaller, and thus faster to download than the full training set. Training on the full data set # would yield better results). Note that there are some cars in the COCO 2017 data set, but it's # a small enough fraction not to matter - just keep in mind that this will drive down our percieved @@ -166,22 +168,22 @@ # ---------------- # Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have # to load the images into memory, resize them to be 64x64, and convert them to raw, uncompressed -# data. Keras's `image_dataset_from_directory` will take care of most of this, though it loads +# data. Keras's ``image_dataset_from_directory`` will take care of most of this, though it loads # images such that each pixel value is a float from 0 to 255. # # We'll also need to load labels, though Keras will help with this. From our subdirectory structure, -# it knows the images in `/objects` are one class, and those in `/random` another. Setting -# `label_mode='categorical'` tells Keras to convert these into **categorical labels** - a 2x1 vector -# that's either `[1, 0]` for an object of our target class, or `[0, 1]` vector for anything else. -# We'll also set `shuffle=True` to randomize the order of our examples. +# it knows the images in ``/objects`` are one class, and those in ``/random`` another. Setting +# ``label_mode='categorical'`` tells Keras to convert these into **categorical labels** - a 2x1 vector +# that's either ``[1, 0]`` for an object of our target class, or ``[0, 1]`` vector for anything else. +# We'll also set ``shuffle=True`` to randomize the order of our examples. # # We will also **batch** the data - grouping samples into clumps to make our training go faster. -# Setting `batch_size = 32` is a decent number. +# Setting ``batch_size = 32`` is a decent number. # # Lastly, in machine learning we generally want our inputs to be small numbers. We'll thus use a -# `Rescaling` layer to change our images such that each pixel is a float between `0.0` and `1.0`, -# instead of `0` to `255`. We need to be careful not to rescale our categorical labels though, so -# we'll use a `lambda` function. +# ``Rescaling`` layer to change our images such that each pixel is a float between ``0.0`` and ``1.0``, +# instead of ``0`` to ``255``. We need to be careful not to rescale our categorical labels though, so +# we'll use a ``lambda`` function. import tensorflow as tf @@ -200,7 +202,7 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^ # Before giving this data set to our neural network, we ought to give it a quick visual inspection. # Does the data look properly transformed? Do the labels seem appropriate? And what's our ratio of -# objects to other stuff? We can display some examples from our datasets using `matplotlib`: +# objects to other stuff? We can display some examples from our datasets using ``matplotlib``: import matplotlib.pyplot as plt from os import listdir @@ -237,8 +239,8 @@ ###################################################################### # Loading the Data # ---------------- -# In the past decade, `convolutional neural networks` have been widely -# adopted for image classification tasks. State-of-the-art models like `EfficientNet V2` are able +# In the past decade, `convolutional neural networks `_ have been widely +# adopted for image classification tasks. State-of-the-art models like `EfficientNet V2 `_ are able # to perform image classification better than even humans! Unfortunately, these models have tens of # millions of parameters, and thus won't fit on cheap security camera computers. # @@ -257,13 +259,13 @@ # # What is Transfer Learning? # ^^^^^^^^^^^^^^^^^^^^^^^^^^ -# Deep learning has `dominated image classification` for a long time, +# Deep learning has `dominated image classification `_ for a long time, # but training neural networks takes a lot of time. When a neural network is trained "from scratch", # its parameters start out randomly initialized, forcing it to learn very slowly how to tell images # apart. # # With transfer learning, we instead start with a neural network that's **already** good at a -# specific task. In this example, that task is classifying images from `the ImageNet database`. This +# specific task. In this example, that task is classifying images from `the ImageNet database `_. This # means the network already has some object detection capabilities, and is likely closer to what you # want then a random model would be. # @@ -278,7 +280,7 @@ # conglomerate model for a few epochs on our cars vs non-cars dataset, to fine tune the first layers # and train from scratch the last layers. # -# Source MobileNets for transfer learning have been `pretrained by the Tensorflow folks`, so we +# Source MobileNets for transfer learning have been `pretrained by the Tensorflow folks `_, so we # can just download the one closest to what we want (the 128x128 input model with 0.25 depth scale). # # .. code-block:: bash @@ -325,13 +327,13 @@ # When training neural networks, we must set a parameter called the **learning rate** that controls # how fast our network learns. It must be set carefully - too slow, and our network will take # forever to train; too fast, and our network won't be able to learn some fine details. Generally -# for Adam (the optimizer we're using), `0.001` is a pretty good learning rate (and is what's -# recommended in the `original paper`). However, in this case -# `0.0005` seems to work a little better. +# for Adam (the optimizer we're using), ``0.001`` is a pretty good learning rate (and is what's +# recommended in the `original paper `_). However, in this case +# ``0.0005`` seems to work a little better. # -# We'll also pass the validation set from earlier to `model.fit`. This will evaluate how good our +# We'll also pass the validation set from earlier to ``model.fit``. This will evaluate how good our # model is each time we train it, and let us track how our model is improving. Once training is -# finished, the model should have a validation accuracy around `0.98` (meaning it was right 98% of +# finished, the model should have a validation accuracy around ``0.98`` (meaning it was right 98% of # the time on our validation set). model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), @@ -344,7 +346,7 @@ # ------------ # We've done a decent job of reducing our model's size so far - changing the input dimension, # along with removing the bottom layers reduced the model to just 219k parameters. However, each of -# these parameters is a `float32` that takes four bytes, so our model will take up almost one MB! +# these parameters is a ``float32`` that takes four bytes, so our model will take up almost one MB! # # Additionally, it might be the case that our hardware doesn't have built-in support for floating # point numbers. While most high-memory Arduinos (like the Nano 33 BLE) do have hardware support, @@ -357,10 +359,10 @@ # while being relatively truthful to the original model. # # We will help TensorFlow do this by creating a representative dataset - a subset of the original -# that is used for tracking how those neurons activate. We'll then pass this into a `TFLiteConverter` -# (Keras itself does not have quantization support) with an `Optimize` flag to tell TFLite to perform +# that is used for tracking how those neurons activate. We'll then pass this into a ``TFLiteConverter`` +# (Keras itself does not have quantization support) with an ``Optimize`` flag to tell TFLite to perform # the conversion. By default, TFLite keeps the inputs and outputs of our model as floats, so we must -# explicitly tell it to avoid this behavior. +# explicitly tell it to avoid this behavior. converter = tf.lite.TFLiteConverter.from_keras_model(model) def representative_dataset(): @@ -379,8 +381,8 @@ def representative_dataset(): # Download the Model if Desired # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # We've now got a finished model, that you can use locally or in other tutorials (try autotuning -# this model or viewing it on `https://netron.app/`. If you're running this -# tutorial on Google Colab, then the code below will let you download your `.tflite` model. +# this model or viewing it on `https://netron.app/ `_. If you're running this +# tutorial on Google Colab, then the code below will let you download your ``.tflite`` model. # # .. code-block:: python # @@ -394,16 +396,16 @@ def representative_dataset(): ###################################################################### # Compiling With TVM For Arduino # ------------------------------ -# Tensorflow has a built-in framework for deploying to microcontrollers - `TFLite Micro`. However, +# Tensorflow has a built-in framework for deploying to microcontrollers - `TFLite Micro `_. However, # it's poorly supported by development boards, and does not support autotuning. We will use Apache # TVM instead. # -# TVM can be used either with its command line interface (`tvmc`) or with its Python interface. The +# TVM can be used either with its command line interface (``tvmc``) or with its Python interface. The # Python interface is fully-featured and more stable, so we'll use it here. # # TVM is an optimizing compiler, and optimizations to our model are performed in stages via -# **intermediate representations**. The first of these is `Relay` a high-level intermediate -# representation emphasizing portability. The conversion from `.tflite` to Relay is done without any +# **intermediate representations**. The first of these is `Relay `_ a high-level intermediate +# representation emphasizing portability. The conversion from ``.tflite`` to Relay is done without any # knowledge of our "end goal" - the fact we intend to run this model on an Arduino. # # Choosing an Arduino Board @@ -424,14 +426,15 @@ def representative_dataset(): # ^^^^^^^^^^^^^^^^^^^^^^ # Next, we'll compile the model to TVM's MLF (machine learning format) intermediate representation, # which consists of C/C++ code and is designed for autotuning. To improve performance, we'll tell -# TVM that we're compiling for the `nrf52840` microprocessor (the one the Nano 33 BLE uses). We'll -# also tell it to use the C runtime (abbreviated `crt`) and to use ahead-of-time memory allocation -# (abbreviated `aot`, which helps reduce the model's memory footprint). Lastly, we will disable -# vectorization with `"tir.disable_vectorize": True`, as C has no native vectorized types. +# TVM that we're compiling for the ``nrf52840`` microprocessor (the one the Nano 33 BLE uses). We'll +# also tell it to use the C runtime (abbreviated ``crt``) and to use ahead-of-time memory allocation +# (abbreviated ``aot``, which helps reduce the model's memory footprint). Lastly, we will disable +# vectorization with ``"tir.disable_vectorize": True``, as C has no native vectorized types. # -# Once we have set these configuration parameters, we will call `tvm.relay.build` to compile our +# Once we have set these configuration parameters, we will call ``tvm.relay.build`` to compile our # Relay model into the MLF intermediate representation. From here, we just need to call -# `tvm.micro.generate_project` and pass in the Arduino template project to finish compilation. +# ``tvm.micro.generate_project`` and pass in the Arduino template project to finish compilation. + import shutil import tflite import tvm @@ -478,11 +481,11 @@ def representative_dataset(): # on them both. # # Currently, these are 224x224 PNG images we can download from Imgur. Before we can feed in these -# images, we'll need to resize and convert them to raw data, which can be done with `imagemagick`. +# images, we'll need to resize and convert them to raw data, which can be done with ``imagemagick``. # # It's also challenging to load raw data onto an Arduino, as only C/CPP files (and similar) are # compiled. We can work around this by embedding our raw data in a hard-coded C array with the -# built-in utility `bin2c`, that will output a file resembling the following: +# built-in utility ``bin2c``, that will output a file resembling the following: # # .. code-block:: c # @@ -511,7 +514,7 @@ def representative_dataset(): # Writing our Arduino Script # -------------------------- # We now need a little bit of Arduino code to read the two binary arrays we just generated, run the -# model on them, and log the output to the serial monitor. This file will replace `arduino_sketch.ino` +# model on them, and log the output to the serial monitor. This file will replace ``arduino_sketch.ino`` # as the main file of our sketch. You'll have to copy this code in manually. # # .. code-block:: bash @@ -543,7 +546,7 @@ def representative_dataset(): # Compiling our Code # ^^^^^^^^^^^^^^^^^^ # Now that our project has been generated, TVM's job is mostly done! We can still call -# `arduino_project.build()` and `arduino_project.upload()`, but these just use `arduino-cli`'s +# ``arduino_project.build()`` and ``arduino_project.upload()``, but these just use ``arduino-cli``'s # compile and flash commands underneath. We could also begin autotuning our model, but that's a # subject for a different tutorial. To finish up, we'll first test that our program compiles does # not throw any compiler errors: @@ -579,5 +582,5 @@ def representative_dataset(): # Next Steps # ^^^^^^^^^^ # From here, we could modify the model to read live images from the camera - we have another -# Arduino tutorial for how to do that `on GitHub`. Alternatively, we could also -# `use TVM's autotuning capabilities` to dramatically improve the model's performance. +# Arduino tutorial for how to do that `on GitHub `_. Alternatively, we could also +# `use TVM's autotuning capabilities `_ to dramatically improve the model's performance. From eb66af9c0e9aa2ecbeba014c28723a4c06eaaecb Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Sat, 9 Apr 2022 04:11:02 -0400 Subject: [PATCH 04/14] Linting --- docs/conf.py | 1 + .../how_to/work_with_microtvm/micro_train.py | 81 ++++++++++--------- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 49c5c4fa755d..60f0017cc154 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -312,6 +312,7 @@ def git_describe_version(original_version): "bring_your_own_datatypes.py", ], "micro": [ + "micro_train.py", "micro_autotune.py", "micro_reference_vm.py", "micro_tflite.py", diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index e6606875b091..5dd0c0a05683 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -77,11 +77,12 @@ # We can test our GPU installation with the following code: import tensorflow as tf + if not tf.test.gpu_device_name(): - print("No GPU was detected!") - print("Model training will take much longer (~30 minutes instead of ~5)") + print("No GPU was detected!") + print("Model training will take much longer (~30 minutes instead of ~5)") else: - print("GPU detected - you're good to go.") + print("GPU detected - you're good to go.") ###################################################################### # Choosing Our Work Dir @@ -91,8 +92,9 @@ # probably want to store it elsewhere if running locally. FOLDER = "/root" -#.. testsetup:: +# .. testsetup:: import tempfile + FOLDER = tempfile.mkdtemp() ###################################################################### @@ -151,17 +153,17 @@ # curl "http://images.cocodataset.org/zips/val2017.zip" -o {FOLDER}/images/random.zip # unzip -jqo {FOLDER}/images/random.zip -d {FOLDER}/images/random - -#.. doctest:: import os + os.mkdir(FOLDER + "/images") os.mkdir(FOLDER + "/images/object") os.mkdir(FOLDER + "/images/random") from PIL import Image + for category in ["object", "random"]: - for i in range(48): - img = Image.new("RGB", (100, 100), (255, 255, 255)) - img.save(FOLDER + f"/images/{category}/{i:05d}.jpg", "JPEG") + for i in range(48): + img = Image.new("RGB", (100, 100), (255, 255, 255)) + img.save(FOLDER + f"/images/{category}/{i:05d}.jpg", "JPEG") ###################################################################### # Loading the Data @@ -188,13 +190,13 @@ import tensorflow as tf unscaled_dataset = tf.keras.utils.image_dataset_from_directory( - FOLDER + "/images", - batch_size=32, - shuffle=True, - label_mode='categorical', - image_size=(96, 96), + FOLDER + "/images", + batch_size=32, + shuffle=True, + label_mode="categorical", + image_size=(96, 96), ) -rescale = tf.keras.layers.Rescaling(scale=1.0/255) +rescale = tf.keras.layers.Rescaling(scale=1.0 / 255) full_dataset = unscaled_dataset.map(lambda im, lbl: (rescale(im), lbl)) ###################################################################### @@ -213,12 +215,12 @@ SAMPLES_TO_SHOW = 10 plt.figure(figsize=(20, 10)) for i, (image, label) in enumerate(unscaled_dataset.unbatch()): - if i >= SAMPLES_TO_SHOW: - break - ax = plt.subplot(1, SAMPLES_TO_SHOW, i + 1) - plt.imshow(image.numpy().astype("uint8")) - plt.title(list(label.numpy())) - plt.axis("off") + if i >= SAMPLES_TO_SHOW: + break + ax = plt.subplot(1, SAMPLES_TO_SHOW, i + 1) + plt.imshow(image.numpy().astype("uint8")) + plt.title(list(label.numpy())) + plt.axis("off") ###################################################################### # What's Inside Our Dataset? @@ -291,13 +293,11 @@ IMAGE_SIZE = (96, 96, 3) WEIGHTS_PATH = FOLDER + "/models/mobilenet_2_5_128_tf.h5" -#.. doctest:: +# .. doctest:: os.mkdir(FOLDER + "/models") WEIGHTS_PATH = None pretrained = tf.keras.applications.MobileNet( - input_shape = IMAGE_SIZE, - weights = WEIGHTS_PATH, - alpha = 0.25 + input_shape=IMAGE_SIZE, weights=WEIGHTS_PATH, alpha=0.25 ) ###################################################################### @@ -311,15 +311,12 @@ model = tf.keras.models.Sequential() model.add(tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE)) -model.add(tf.keras.Model( - inputs=pretrained.inputs, - outputs=pretrained.layers[-5].output -)) +model.add(tf.keras.Model(inputs=pretrained.inputs, outputs=pretrained.layers[-5].output)) model.add(tf.keras.layers.Reshape((-1,))) model.add(tf.keras.layers.Dropout(0.1)) model.add(tf.keras.layers.Flatten()) -model.add(tf.keras.layers.Dense(2, activation='softmax')) +model.add(tf.keras.layers.Dense(2, activation="softmax")) ###################################################################### # Training Our Network @@ -336,9 +333,11 @@ # finished, the model should have a validation accuracy around ``0.98`` (meaning it was right 98% of # the time on our validation set). -model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), - loss='categorical_crossentropy', - metrics=['accuracy']) +model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), + loss="categorical_crossentropy", + metrics=["accuracy"], +) model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) ###################################################################### @@ -365,9 +364,12 @@ # explicitly tell it to avoid this behavior. converter = tf.lite.TFLiteConverter.from_keras_model(model) + + def representative_dataset(): - for image_batch, label_batch in full_dataset.take(3): - yield [image_batch] + for image_batch, label_batch in full_dataset.take(3): + yield [image_batch] + converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset @@ -440,9 +442,9 @@ def representative_dataset(): import tvm # Method to load model is different in TFLite 1 vs 2 -try: # TFLite 2.1 and above +try: # TFLite 2.1 and above tflite_model = tflite.Model.GetRootAsModel(quantized_model, 0) -except AttributeError: # Fall back to TFLite 1.14 method +except AttributeError: # Fall back to TFLite 1.14 method tflite_model = tflite.Model.Model.GetRootAsModel(quantized_model, 0) # Convert to the Relay intermediate representation @@ -459,6 +461,7 @@ def representative_dataset(): # Generate an Arduino project from the MLF intermediate representation from unittest.mock import create_autospec, MagicMock + tvm.micro.generate_project = create_autospec(tvm.micro.generate_project, return_value=MagicMock()) shutil.rmtree(FOLDER + "/models/project", ignore_errors=True) @@ -517,8 +520,8 @@ def representative_dataset(): # model on them, and log the output to the serial monitor. This file will replace ``arduino_sketch.ino`` # as the main file of our sketch. You'll have to copy this code in manually. # -# .. code-block:: bash - +# .. code-block:: c +# # #include "src/model.h" # #include "car.c" # #include "catan.c" From d248f9a0c27268d6932f949731cce357058539b0 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Fri, 15 Apr 2022 02:34:41 -0400 Subject: [PATCH 05/14] Proof of concept showing that "Open in Colab" is possible --- docs/conf.py | 1 - .../how_to/work_with_microtvm/micro_train.py | 193 +++++++++++++----- tests/scripts/ci.py | 2 +- 3 files changed, 139 insertions(+), 57 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 60f0017cc154..7758e4fde5cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -361,7 +361,6 @@ def force_gc(gallery_conf, fname): "gallery_dirs": gallery_dirs, "subsection_order": subsection_order, "filename_pattern": os.environ.get("TVM_TUTORIAL_EXEC_PATTERN", ".py"), - "find_mayavi_figures": False, "download_all_examples": False, "min_reported_time": 60, "expected_failing_examples": [], diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index 5dd0c0a05683..416d4625c86b 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -17,8 +17,8 @@ """ .. _microtvm-train-arduino: -Training Vision Models for microTVM -=================================== +Training Vision Models for microTVM on Arduino +============================================== **Author**: `Gavin Uberti `_ This tutorial shows how MobileNetV1 models can be trained @@ -31,17 +31,25 @@ # # This tutorial is best viewed as a Jupyter Notebook. You can download and run it locally # using the link at the bottom of this page, or open it online for free using Google Colab. +# Click the icon below to open in Google Colab. +# +# .. image:: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d0/Google_Colaboratory_SVG_Logo.svg/800px-Google_Colaboratory_SVG_Logo.svg.png +# :align: center +# :target: https://colab.research.google.com/github/guberti/tvm-site/blob/asf-site/docs/_downloads/a7c7ea4b5017ae70db1f51dd8e6dcd82/micro_train.ipynb # # Motivation # ---------- # When building IOT devices, we often want them to **see and understand** the world around them. # This can take many forms, but often times a device will want to know if a certain **kind of -# object** is in its field of vision. For example: -# * A security camera might look for **people**, so it can decide whether to save a video to memory. -# * A traffic light might look for **cars**, so it can judge which lights should change first. -# * A forest camera might want to look for a **kind of animal**, so they can estimate how large the animal population is. +# object** is in its field of vision. +# +# For example, a security camera might look for **people**, so it can decide whether to save a video +# to memory. A traffic light might look for **cars**, so it can judge which lights should change +# first. Or a forest camera might look for a **kind of animal**, so they can estimate how large +# the animal population is. +# # To make these devices affordable, we would like them to need only a low-cost processor like the -# `nRF52840 `_ (costing $5 each on Mouser) or the `RP2040 `_ (just $1.45 each!). +# `nRF52840 `_ (costing five dollars each on Mouser) or the `RP2040 `_ (just $1.45 each!). # # These devices have very little memory (~250 KB RAM), meaning that no conventional edge AI # vision model (like MobileNet or EfficientNet) will be able to run. In this tutorial, we will @@ -57,6 +65,7 @@ # # .. code-block:: bash # +# %%bash # pip install -q tensorflow tflite pyserial # pip install -q tlcpack-nightly -f https://tlcpack.ai/wheels # apt-get -qq install imagemagick curl @@ -91,12 +100,40 @@ # will all live. If running on Google Colab, we'll save everything in ``/root`` (aka ``~``) but you'll # probably want to store it elsewhere if running locally. +import os FOLDER = "/root" -# .. testsetup:: +os.environ["FOLDER"] = FOLDER +# sphinx_gallery_start_ignore +# Training a model takes a lot of disc space and CPU time, and I don't want to +# slow down the build of the docs. To solve this problem, we'll mock the +# problematic methods, but still run most of the code so it serves as a test. +# Note that this mocking code will not show up on the webpage or Colab notebook. import tempfile +import unittest +# Disable Tensorflow's complaining about misconfigured GPU +tf.get_logger().setLevel('INFO') + +# Do our work in a tempfile instead of the Colab root directory FOLDER = tempfile.mkdtemp() +# Don't mess with environment variables for bash +del os.environ["FOLDER"] + +# Rather than download our image files, we will just make blank ones. +os.mkdir(FOLDER + "/images") +os.mkdir(FOLDER + "/images/object") +os.mkdir(FOLDER + "/images/random") +from PIL import Image +for category, color in [("object", 0), ("random", 255)]: + for i in range(48): + img = Image.new("RGB", (100, 100), (color, color, color)) + img.save(FOLDER + f"/images/{category}/{i:05d}.jpg", "JPEG") + +# Make our models directory where the .tflite file will be saved +os.mkdir(FOLDER + "/models") +# sphinx_gallery_end_ignore + ###################################################################### # Downloading the Data # -------------------- @@ -111,7 +148,7 @@ # # To get our car images, we'll be downloading the `Stanford Cars dataset `_, # which contains 16,185 full color images of cars. We'll also need images of random things that -# aren't cars, so we'll use the `COCO 2017 ` validation set (it's +# aren't cars, so we'll use the `COCO 2017 `_ validation set (it's # smaller, and thus faster to download than the full training set. Training on the full data set # would yield better results). Note that there are some cars in the COCO 2017 data set, but it's # a small enough fraction not to matter - just keep in mind that this will drive down our percieved @@ -143,29 +180,17 @@ # # .. code-block:: bash # +# %%bash # # Download and extract our car images -# mkdir -p {FOLDER}/images/object/ -# curl "http://ai.stanford.edu/~jkrause/car196/car_ims.tgz" -o {FOLDER}/images/object.tgz -# tar -xf {FOLDER}/images/object.tgz --strip-components 1 -C {FOLDER}/images/object# +# mkdir -p $FOLDER/images/object/ +# curl "http://ai.stanford.edu/~jkrause/car196/car_ims.tgz" -o $FOLDER/images/object.tgz +# tar -xf $FOLDER/images/object.tgz --strip-components 1 -C $FOLDER/images/object # # # Download and extract other images -# mkdir -p {FOLDER}/images/random/ -# curl "http://images.cocodataset.org/zips/val2017.zip" -o {FOLDER}/images/random.zip -# unzip -jqo {FOLDER}/images/random.zip -d {FOLDER}/images/random - -import os - -os.mkdir(FOLDER + "/images") -os.mkdir(FOLDER + "/images/object") -os.mkdir(FOLDER + "/images/random") -from PIL import Image - -for category in ["object", "random"]: - for i in range(48): - img = Image.new("RGB", (100, 100), (255, 255, 255)) - img.save(FOLDER + f"/images/{category}/{i:05d}.jpg", "JPEG") - -###################################################################### +# mkdir -p $FOLDER/images/random/ +# curl "http://images.cocodataset.org/zips/val2017.zip" -o $FOLDER/images/random.zip +# unzip -jqo $FOLDER/images/random.zip -d $FOLDER/images/random +# # Loading the Data # ---------------- # Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have @@ -194,7 +219,7 @@ batch_size=32, shuffle=True, label_mode="categorical", - image_size=(96, 96), + image_size=(64, 64), ) rescale = tf.keras.layers.Rescaling(scale=1.0 / 255) full_dataset = unscaled_dataset.map(lambda im, lbl: (rescale(im), lbl)) @@ -287,15 +312,18 @@ # # .. code-block:: bash # -# mkdir -p {FOLDER}/models +# %%bash +# mkdir -p $FOLDER/models # curl "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5" \ -# -o {FOLDER}/models/mobilenet_2_5_128_tf.h5 +# -o $FOLDER/models/mobilenet_2_5_128_tf.h5 +# -IMAGE_SIZE = (96, 96, 3) +IMAGE_SIZE = (64, 64, 3) WEIGHTS_PATH = FOLDER + "/models/mobilenet_2_5_128_tf.h5" -# .. doctest:: -os.mkdir(FOLDER + "/models") +# sphinx_gallery_start_ignore +# Use random weights instead of ones from the file we did not download WEIGHTS_PATH = None +# sphinx_gallery_end_ignore pretrained = tf.keras.applications.MobileNet( input_shape=IMAGE_SIZE, weights=WEIGHTS_PATH, alpha=0.25 ) @@ -338,6 +366,10 @@ loss="categorical_crossentropy", metrics=["accuracy"], ) +# sphinx_gallery_start_ignore +# Skip training to save time +model.fit = unittest.mock.create_autospec(model.fit) +# sphinx_gallery_end_ignore model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) ###################################################################### @@ -365,12 +397,10 @@ converter = tf.lite.TFLiteConverter.from_keras_model(model) - def representative_dataset(): for image_batch, label_batch in full_dataset.take(3): yield [image_batch] - converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] @@ -383,17 +413,16 @@ def representative_dataset(): # Download the Model if Desired # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # We've now got a finished model, that you can use locally or in other tutorials (try autotuning -# this model or viewing it on `https://netron.app/ `_. If you're running this -# tutorial on Google Colab, then the code below will let you download your ``.tflite`` model. -# -# .. code-block:: python -# -# from google.colab import files -# QUANTIZED_MODEL_PATH = '/root/models/quantized.tflite' -# with open(QUANTIZED_MODEL_PATH, 'wb') as f: -# f.write(quantized_model) -# files.download(QUANTIZED_MODEL_PATH) -# +# this model or viewing it on `https://netron.app/ `_). But before we do +# those things, we'll have to write it to a file (``quantized.tflite``). If you're running this +# tutorial on Google Colab, you'll have to uncomment the last two lines to download the file +# after writing it. + +QUANTIZED_MODEL_PATH = FOLDER + "/models/quantized.tflite" +with open(QUANTIZED_MODEL_PATH, 'wb') as f: + f.write(quantized_model) +#from google.colab import files +#files.download(QUANTIZED_MODEL_PATH) ###################################################################### # Compiling With TVM For Arduino @@ -458,12 +487,15 @@ def representative_dataset(): # Convert to the MLF intermediate representation with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): mod = tvm.relay.build(mod, target, runtime=runtime, executor=executor, params=params) +# sphinx_gallery_start_ignore +# Mock the generate project function so we can skip installing arduino-cli +tvm.micro.generate_project = unittest.mock.create_autospec( + tvm.micro.generate_project, + return_value=unittest.mock.MagicMock() +) +# sphinx_gallery_end_ignore # Generate an Arduino project from the MLF intermediate representation -from unittest.mock import create_autospec, MagicMock - -tvm.micro.generate_project = create_autospec(tvm.micro.generate_project, return_value=MagicMock()) - shutil.rmtree(FOLDER + "/models/project", ignore_errors=True) arduino_project = tvm.micro.generate_project( tvm.micro.get_microtvm_template_projects("arduino"), @@ -475,6 +507,9 @@ def representative_dataset(): "project_type": "example_project", }, ) +# sphinx_gallery_start_ignore +os.mkdir(FOLDER + "/models/project") +# sphinx_gallery_end_ignore ###################################################################### # Testing our Arduino Project @@ -483,6 +518,11 @@ def representative_dataset(): # We will test our Arduino project by loading both of these images, and executing the compiled model # on them both. # +# .. image:: https://i.imgur.com/mLkmxBm.png +# :align: center +# :height: 200px +# :width: 600px +# # Currently, these are 224x224 PNG images we can download from Imgur. Before we can feed in these # images, we'll need to resize and convert them to raw data, which can be done with ``imagemagick``. # @@ -502,6 +542,7 @@ def representative_dataset(): # # .. code-block:: bash # +# %%bash # mkdir -p /root/tests # curl "https://i.imgur.com/JBbEhxN.png" -o ~/tests/car_224.png # convert ~/tests/car_224.png -resize 64 ~/tests/car_64.png @@ -518,10 +559,11 @@ def representative_dataset(): # -------------------------- # We now need a little bit of Arduino code to read the two binary arrays we just generated, run the # model on them, and log the output to the serial monitor. This file will replace ``arduino_sketch.ino`` -# as the main file of our sketch. You'll have to copy this code in manually. +# as the main file of our sketch. You'll have to copy this code in manually.. # # .. code-block:: c # +# %%writefile /root/models/project.ino # #include "src/model.h" # #include "car.c" # #include "catan.c" @@ -546,7 +588,7 @@ def representative_dataset(): # delay(1000); # } # -# Compiling our Code +# Compiling Our Code # ^^^^^^^^^^^^^^^^^^ # Now that our project has been generated, TVM's job is mostly done! We can still call # ``arduino_project.build()`` and ``arduino_project.upload()``, but these just use ``arduino-cli``'s @@ -554,12 +596,52 @@ def representative_dataset(): # subject for a different tutorial. To finish up, we'll first test that our program compiles does # not throw any compiler errors: +shutil.rmtree(FOLDER + "/models/project/build", ignore_errors=True) arduino_project.build() print("Compilation succeeded!") ###################################################################### +# Uploading to Our Device +# ----------------------- +# The very last step is uploading our sketch to an Arduino to make sure our code works properly. +# Unfortunately, we can't do that from Google Colab, so we'll have to download our sketch. This is +# simple enough to do - we'll just turn our project into a `.zip` archive, and call `files.download`. +# If you're running on Google Colab, you'll have to uncomment the last two lines to download the file +# after writing it. + +ZIP_FOLDER = FOLDER + "/models/project" +shutil.make_archive(ZIP_FOLDER, 'zip', ZIP_FOLDER) +#from google.colab import files +#files.download("/root/models/project.zip") +# sphinx_gallery_start_ignore +# Run a few unit tests to make sure the Python code worked + +# Ensure transfer learn model was correctly assembled +assert len(model.layers) == 5 +assert model.count_params() == 219058 # Only 219,058 of these are trainable + +assert len(quantized_model) >= 250000 # Quantized model will be 250 KB - 350 KB +assert len(quantized_model) <= 350000 # Exact value depends on quantization + +# Assert .tflite and .zip files were written to disk +assert os.path.isfile(FOLDER + "/models/quantized.tflite") +assert os.path.isfile(FOLDER + "/models/project.zip") + +# Assert MLF file was correctly generated +assert str(mod.executor) == "aot" + +# Remove the temporary folder we generated at the beginning +shutil.rmtree(FOLDER) +# sphinx_gallery_end_ignore + + +###################################################################### +# From here, we'll need to open it in the Arduino IDE. You'll have to download the IDE as well as +# the SDK for whichever board you are using. For certain boards like the Sony SPRESENSE, you may +# have to change settings to control how much memory you want the board to use. +# # Expected Results -# ^^^^^^^^^^^^^^^^^^ +# ^^^^^^^^^^^^^^^^ # If all works as expected, you should see the following output on a Serial monitor: # # .. code-block:: @@ -587,3 +669,4 @@ def representative_dataset(): # From here, we could modify the model to read live images from the camera - we have another # Arduino tutorial for how to do that `on GitHub `_. Alternatively, we could also # `use TVM's autotuning capabilities `_ to dramatically improve the model's performance. +# diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py index b3f9cb6500e5..606c285ff883 100755 --- a/tests/scripts/ci.py +++ b/tests/scripts/ci.py @@ -260,7 +260,7 @@ def docs( "tlcpack-sphinx-addon==0.2.1", "synr==0.5.0", "image==1.5.33", - "sphinx-gallery==0.4.0", + "git+https://github.com/guberti/sphinx-gallery.git", "sphinx-rtd-theme==1.0.0", "matplotlib==3.3.4", "commonmark==0.9.1", From 586d79a47e28a470199f7fe538c5a77c758b3428 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Sun, 24 Apr 2022 00:25:49 -0400 Subject: [PATCH 06/14] Make test Python script more readable --- docs/conf.py | 1 + .../how_to/work_with_microtvm/micro_train.py | 135 +++++++----------- tests/scripts/ci.py | 2 +- 3 files changed, 50 insertions(+), 88 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7758e4fde5cc..9d55e20c03e5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -365,6 +365,7 @@ def force_gc(gallery_conf, fname): "min_reported_time": 60, "expected_failing_examples": [], "reset_modules": ("matplotlib", "seaborn", force_gc), + "promote_jupyter_magic": True, } autodoc_default_options = { diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index 416d4625c86b..d9001b5a76a1 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -102,36 +102,9 @@ import os FOLDER = "/root" -os.environ["FOLDER"] = FOLDER # sphinx_gallery_start_ignore -# Training a model takes a lot of disc space and CPU time, and I don't want to -# slow down the build of the docs. To solve this problem, we'll mock the -# problematic methods, but still run most of the code so it serves as a test. -# Note that this mocking code will not show up on the webpage or Colab notebook. import tempfile -import unittest - -# Disable Tensorflow's complaining about misconfigured GPU -tf.get_logger().setLevel('INFO') - -# Do our work in a tempfile instead of the Colab root directory FOLDER = tempfile.mkdtemp() - -# Don't mess with environment variables for bash -del os.environ["FOLDER"] - -# Rather than download our image files, we will just make blank ones. -os.mkdir(FOLDER + "/images") -os.mkdir(FOLDER + "/images/object") -os.mkdir(FOLDER + "/images/random") -from PIL import Image -for category, color in [("object", 0), ("random", 255)]: - for i in range(48): - img = Image.new("RGB", (100, 100), (color, color, color)) - img.save(FOLDER + f"/images/{category}/{i:05d}.jpg", "JPEG") - -# Make our models directory where the .tflite file will be saved -os.mkdir(FOLDER + "/models") # sphinx_gallery_end_ignore ###################################################################### @@ -172,25 +145,33 @@ # │ │ └── 000000581781.jpg # │ └── random.zip # -# We should also note that Stanford cars has 16k images, while the COCO 2017 validation set is 5k +# We should also note that Stanford cars has 8k images, while the COCO 2017 validation set is 5k # images - it is not a 50/50 split! If we wanted to, we could weight these classes differently # during training to correct for this, but training will still work if we ignore it. It should # take about **2 minutes** to download the Stanford Cars, while COCO 2017 validation will take # **1 minute**. -# -# .. code-block:: bash -# -# %%bash -# # Download and extract our car images -# mkdir -p $FOLDER/images/object/ -# curl "http://ai.stanford.edu/~jkrause/car196/car_ims.tgz" -o $FOLDER/images/object.tgz -# tar -xf $FOLDER/images/object.tgz --strip-components 1 -C $FOLDER/images/object -# -# # Download and extract other images -# mkdir -p $FOLDER/images/random/ -# curl "http://images.cocodataset.org/zips/val2017.zip" -o $FOLDER/images/random.zip -# unzip -jqo $FOLDER/images/random.zip -d $FOLDER/images/random -# + +import os +import shutil +import urllib.request + +# Download datasets +os.makedirs(f"{FOLDER}/images") +urllib.request.urlretrieve( + "http://ai.stanford.edu/~jkrause/car196/cars_train.tgz", + f"{FOLDER}/images/target.tgz" +) +urllib.request.urlretrieve( + "http://images.cocodataset.org/zips/val2017.zip", + f"{FOLDER}/images/random.zip" +) + +# Extract them and rename their folders +shutil.unpack_archive(f"{FOLDER}/images/target.tgz", f"{FOLDER}/images") +shutil.unpack_archive(f"{FOLDER}/images/random.zip", f"{FOLDER}/images") +shutil.move(f"{FOLDER}/images/cars_train", f"{FOLDER}/images/target") +shutil.move(f"{FOLDER}/images/val2017", f"{FOLDER}/images/random") + # Loading the Data # ---------------- # Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have @@ -212,14 +193,13 @@ # instead of ``0`` to ``255``. We need to be careful not to rescale our categorical labels though, so # we'll use a ``lambda`` function. -import tensorflow as tf - +IMAGE_SIZE = (64, 64, 3) unscaled_dataset = tf.keras.utils.image_dataset_from_directory( - FOLDER + "/images", + f"{FOLDER}/images", batch_size=32, shuffle=True, label_mode="categorical", - image_size=(64, 64), + image_size=IMAGE_SIZE[0:2], ) rescale = tf.keras.layers.Rescaling(scale=1.0 / 255) full_dataset = unscaled_dataset.map(lambda im, lbl: (rescale(im), lbl)) @@ -232,11 +212,13 @@ # objects to other stuff? We can display some examples from our datasets using ``matplotlib``: import matplotlib.pyplot as plt -from os import listdir -print("/images/random contains %d images" % len(listdir(FOLDER + "/images/random/"))) -print("/images/target contains %d images" % len(listdir(FOLDER + "/images/object/"))) +num_target_class = len(os.listdir(f"{FOLDER}/images/target/")) +num_random_class = len(os.listdir(f"{FOLDER}/images/random/")) +print(f"{FOLDER}/images/target contains {num_target_class} images") +print(f"{FOLDER}/images/random contains {num_random_class} images") +# Show some samples and their labels SAMPLES_TO_SHOW = 10 plt.figure(figsize=(20, 10)) for i, (image, label) in enumerate(unscaled_dataset.unbatch()): @@ -309,21 +291,14 @@ # # Source MobileNets for transfer learning have been `pretrained by the Tensorflow folks `_, so we # can just download the one closest to what we want (the 128x128 input model with 0.25 depth scale). -# -# .. code-block:: bash -# -# %%bash -# mkdir -p $FOLDER/models -# curl "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5" \ -# -o $FOLDER/models/mobilenet_2_5_128_tf.h5 -# -IMAGE_SIZE = (64, 64, 3) -WEIGHTS_PATH = FOLDER + "/models/mobilenet_2_5_128_tf.h5" -# sphinx_gallery_start_ignore -# Use random weights instead of ones from the file we did not download -WEIGHTS_PATH = None -# sphinx_gallery_end_ignore +os.makedirs(f"{FOLDER}/models") +WEIGHTS_PATH = f"{FOLDER}/models/mobilenet_2_5_128_tf.h5" +urllib.request.urlretrieve( + "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5", + WEIGHTS_PATH +) + pretrained = tf.keras.applications.MobileNet( input_shape=IMAGE_SIZE, weights=WEIGHTS_PATH, alpha=0.25 ) @@ -366,11 +341,7 @@ loss="categorical_crossentropy", metrics=["accuracy"], ) -# sphinx_gallery_start_ignore -# Skip training to save time -model.fit = unittest.mock.create_autospec(model.fit) -# sphinx_gallery_end_ignore -model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) +#model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) ###################################################################### # Quantization @@ -398,7 +369,7 @@ converter = tf.lite.TFLiteConverter.from_keras_model(model) def representative_dataset(): - for image_batch, label_batch in full_dataset.take(3): + for image_batch, label_batch in full_dataset.take(10): yield [image_batch] converter.optimizations = [tf.lite.Optimize.DEFAULT] @@ -418,7 +389,7 @@ def representative_dataset(): # tutorial on Google Colab, you'll have to uncomment the last two lines to download the file # after writing it. -QUANTIZED_MODEL_PATH = FOLDER + "/models/quantized.tflite" +QUANTIZED_MODEL_PATH = f"{FOLDER}/models/quantized.tflite" with open(QUANTIZED_MODEL_PATH, 'wb') as f: f.write(quantized_model) #from google.colab import files @@ -487,29 +458,19 @@ def representative_dataset(): # Convert to the MLF intermediate representation with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): mod = tvm.relay.build(mod, target, runtime=runtime, executor=executor, params=params) -# sphinx_gallery_start_ignore -# Mock the generate project function so we can skip installing arduino-cli -tvm.micro.generate_project = unittest.mock.create_autospec( - tvm.micro.generate_project, - return_value=unittest.mock.MagicMock() -) -# sphinx_gallery_end_ignore # Generate an Arduino project from the MLF intermediate representation -shutil.rmtree(FOLDER + "/models/project", ignore_errors=True) +shutil.rmtree(f"{FOLDER}/models/project", ignore_errors=True) arduino_project = tvm.micro.generate_project( tvm.micro.get_microtvm_template_projects("arduino"), mod, - FOLDER + "/models/project", + f"{FOLDER}/models/project", { "arduino_board": "nano33ble", "arduino_cli_cmd": "/content/bin/arduino-cli", "project_type": "example_project", }, ) -# sphinx_gallery_start_ignore -os.mkdir(FOLDER + "/models/project") -# sphinx_gallery_end_ignore ###################################################################### # Testing our Arduino Project @@ -596,7 +557,7 @@ def representative_dataset(): # subject for a different tutorial. To finish up, we'll first test that our program compiles does # not throw any compiler errors: -shutil.rmtree(FOLDER + "/models/project/build", ignore_errors=True) +shutil.rmtree("{FOLDER}/models/project/build", ignore_errors=True) arduino_project.build() print("Compilation succeeded!") @@ -609,10 +570,10 @@ def representative_dataset(): # If you're running on Google Colab, you'll have to uncomment the last two lines to download the file # after writing it. -ZIP_FOLDER = FOLDER + "/models/project" +ZIP_FOLDER = "{FOLDER}/models/project" shutil.make_archive(ZIP_FOLDER, 'zip', ZIP_FOLDER) #from google.colab import files -#files.download("/root/models/project.zip") +#files.download(f"{FOLDER}/models/project.zip") # sphinx_gallery_start_ignore # Run a few unit tests to make sure the Python code worked @@ -624,8 +585,8 @@ def representative_dataset(): assert len(quantized_model) <= 350000 # Exact value depends on quantization # Assert .tflite and .zip files were written to disk -assert os.path.isfile(FOLDER + "/models/quantized.tflite") -assert os.path.isfile(FOLDER + "/models/project.zip") +assert os.path.isfile("{FOLDER}/models/quantized.tflite") +assert os.path.isfile("{FOLDER}/models/project.zip") # Assert MLF file was correctly generated assert str(mod.executor) == "aot" diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py index 606c285ff883..d6a3d457fe65 100755 --- a/tests/scripts/ci.py +++ b/tests/scripts/ci.py @@ -260,7 +260,7 @@ def docs( "tlcpack-sphinx-addon==0.2.1", "synr==0.5.0", "image==1.5.33", - "git+https://github.com/guberti/sphinx-gallery.git", + "git+https://github.com/guberti/sphinx-gallery.git@ipynb-include-bash", "sphinx-rtd-theme==1.0.0", "matplotlib==3.3.4", "commonmark==0.9.1", From 8c56eb3f981186c90866ff9158d88ce4037941be Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Wed, 27 Apr 2022 18:48:30 -0400 Subject: [PATCH 07/14] Fix formatting --- .../how_to/work_with_microtvm/micro_train.py | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index d9001b5a76a1..d1ee33f32b37 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -33,7 +33,7 @@ # using the link at the bottom of this page, or open it online for free using Google Colab. # Click the icon below to open in Google Colab. # -# .. image:: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d0/Google_Colaboratory_SVG_Logo.svg/800px-Google_Colaboratory_SVG_Logo.svg.png +# .. image:: https://raw.githubusercontent.com/guberti/web-data/micro-train-tutorial-data/images/utilities/colab_button.png # :align: center # :target: https://colab.research.google.com/github/guberti/tvm-site/blob/asf-site/docs/_downloads/a7c7ea4b5017ae70db1f51dd8e6dcd82/micro_train.ipynb # @@ -101,9 +101,11 @@ # probably want to store it elsewhere if running locally. import os + FOLDER = "/root" # sphinx_gallery_start_ignore import tempfile + FOLDER = tempfile.mkdtemp() # sphinx_gallery_end_ignore @@ -158,12 +160,10 @@ # Download datasets os.makedirs(f"{FOLDER}/images") urllib.request.urlretrieve( - "http://ai.stanford.edu/~jkrause/car196/cars_train.tgz", - f"{FOLDER}/images/target.tgz" + "http://ai.stanford.edu/~jkrause/car196/cars_train.tgz", f"{FOLDER}/images/target.tgz" ) urllib.request.urlretrieve( - "http://images.cocodataset.org/zips/val2017.zip", - f"{FOLDER}/images/random.zip" + "http://images.cocodataset.org/zips/val2017.zip", f"{FOLDER}/images/random.zip" ) # Extract them and rename their folders @@ -296,7 +296,7 @@ WEIGHTS_PATH = f"{FOLDER}/models/mobilenet_2_5_128_tf.h5" urllib.request.urlretrieve( "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_2_5_128_tf.h5", - WEIGHTS_PATH + WEIGHTS_PATH, ) pretrained = tf.keras.applications.MobileNet( @@ -341,7 +341,7 @@ loss="categorical_crossentropy", metrics=["accuracy"], ) -#model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) +# model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) ###################################################################### # Quantization @@ -368,10 +368,12 @@ converter = tf.lite.TFLiteConverter.from_keras_model(model) + def representative_dataset(): for image_batch, label_batch in full_dataset.take(10): yield [image_batch] + converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] @@ -390,10 +392,10 @@ def representative_dataset(): # after writing it. QUANTIZED_MODEL_PATH = f"{FOLDER}/models/quantized.tflite" -with open(QUANTIZED_MODEL_PATH, 'wb') as f: - f.write(quantized_model) -#from google.colab import files -#files.download(QUANTIZED_MODEL_PATH) +with open(QUANTIZED_MODEL_PATH, "wb") as f: + f.write(quantized_model) +# from google.colab import files +# files.download(QUANTIZED_MODEL_PATH) ###################################################################### # Compiling With TVM For Arduino @@ -479,7 +481,7 @@ def representative_dataset(): # We will test our Arduino project by loading both of these images, and executing the compiled model # on them both. # -# .. image:: https://i.imgur.com/mLkmxBm.png +# .. image:: https://raw.githubusercontent.com/guberti/web-data/micro-train-tutorial-data/testdata/microTVM/data/model_train_images_combined.png # :align: center # :height: 200px # :width: 600px @@ -571,18 +573,18 @@ def representative_dataset(): # after writing it. ZIP_FOLDER = "{FOLDER}/models/project" -shutil.make_archive(ZIP_FOLDER, 'zip', ZIP_FOLDER) -#from google.colab import files -#files.download(f"{FOLDER}/models/project.zip") +shutil.make_archive(ZIP_FOLDER, "zip", ZIP_FOLDER) +# from google.colab import files +# files.download(f"{FOLDER}/models/project.zip") # sphinx_gallery_start_ignore # Run a few unit tests to make sure the Python code worked # Ensure transfer learn model was correctly assembled assert len(model.layers) == 5 -assert model.count_params() == 219058 # Only 219,058 of these are trainable +assert model.count_params() == 219058 # Only 219,058 of these are trainable -assert len(quantized_model) >= 250000 # Quantized model will be 250 KB - 350 KB -assert len(quantized_model) <= 350000 # Exact value depends on quantization +assert len(quantized_model) >= 250000 # Quantized model will be 250 KB - 350 KB +assert len(quantized_model) <= 350000 # Exact value depends on quantization # Assert .tflite and .zip files were written to disk assert os.path.isfile("{FOLDER}/models/quantized.tflite") From 42355f04cc8b0c2a43b5e21d11814741f79d692b Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Mon, 2 May 2022 16:45:09 -0400 Subject: [PATCH 08/14] Ready for review --- .../how_to/work_with_microtvm/micro_train.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index d1ee33f32b37..83d7d3c87083 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -36,6 +36,7 @@ # .. image:: https://raw.githubusercontent.com/guberti/web-data/micro-train-tutorial-data/images/utilities/colab_button.png # :align: center # :target: https://colab.research.google.com/github/guberti/tvm-site/blob/asf-site/docs/_downloads/a7c7ea4b5017ae70db1f51dd8e6dcd82/micro_train.ipynb +# :width: 600px # # Motivation # ---------- @@ -98,7 +99,8 @@ # ^^^^^^^^^^^^^^^^^^^^^ # We need to pick a directory where our image datasets, trained model, and eventual Arduino sketch # will all live. If running on Google Colab, we'll save everything in ``/root`` (aka ``~``) but you'll -# probably want to store it elsewhere if running locally. +# probably want to store it elsewhere if running locally. Note that this variable only affects Python +# scripts - you'll have to adjust the Bash commands too. import os @@ -172,6 +174,7 @@ shutil.move(f"{FOLDER}/images/cars_train", f"{FOLDER}/images/target") shutil.move(f"{FOLDER}/images/val2017", f"{FOLDER}/images/random") +###################################################################### # Loading the Data # ---------------- # Currently, our data is stored on-disk as JPG files of various sizes. To train with it, we'll have @@ -230,8 +233,8 @@ plt.axis("off") ###################################################################### -# What's Inside Our Dataset? -# ^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Validating our Accuracy +# ^^^^^^^^^^^^^^^^^^^^^^^ # While developing our model, we'll often want to check how accurate it is (e.g. to see if it # improves during training). How do we do this? We could just train it on *all* of the data, and # then ask it to classify that same data. However, our model could cheat by just memorizing all of @@ -341,7 +344,7 @@ loss="categorical_crossentropy", metrics=["accuracy"], ) -# model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) +model.fit(train_dataset, validation_data=validation_dataset, epochs=3, verbose=2) ###################################################################### # Quantization @@ -506,7 +509,7 @@ def representative_dataset(): # .. code-block:: bash # # %%bash -# mkdir -p /root/tests +# mkdir -p ~/tests # curl "https://i.imgur.com/JBbEhxN.png" -o ~/tests/car_224.png # convert ~/tests/car_224.png -resize 64 ~/tests/car_64.png # stream ~/tests/car_64.png ~/tests/car.raw @@ -559,7 +562,7 @@ def representative_dataset(): # subject for a different tutorial. To finish up, we'll first test that our program compiles does # not throw any compiler errors: -shutil.rmtree("{FOLDER}/models/project/build", ignore_errors=True) +shutil.rmtree(f"{FOLDER}/models/project/build", ignore_errors=True) arduino_project.build() print("Compilation succeeded!") @@ -572,7 +575,7 @@ def representative_dataset(): # If you're running on Google Colab, you'll have to uncomment the last two lines to download the file # after writing it. -ZIP_FOLDER = "{FOLDER}/models/project" +ZIP_FOLDER = f"{FOLDER}/models/project" shutil.make_archive(ZIP_FOLDER, "zip", ZIP_FOLDER) # from google.colab import files # files.download(f"{FOLDER}/models/project.zip") @@ -587,8 +590,8 @@ def representative_dataset(): assert len(quantized_model) <= 350000 # Exact value depends on quantization # Assert .tflite and .zip files were written to disk -assert os.path.isfile("{FOLDER}/models/quantized.tflite") -assert os.path.isfile("{FOLDER}/models/project.zip") +assert os.path.isfile(f"{FOLDER}/models/quantized.tflite") +assert os.path.isfile(f"{FOLDER}/models/project.zip") # Assert MLF file was correctly generated assert str(mod.executor) == "aot" From 95676f26ed53b81a9bad9f097d3e8a2ba38090fb Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Mon, 2 May 2022 16:51:59 -0400 Subject: [PATCH 09/14] Import pyserial only when needed Changes from code review Use official sphinx-gallery repo Correctly specify version Import pyserial only when necessary --- .../template_project/microtvm_api_server.py | 4 +++- apps/microtvm/pyproject.toml | 2 +- docker/install/ubuntu_install_sphinx.sh | 2 +- .../how_to/work_with_microtvm/micro_train.py | 24 ++++++++++--------- tests/scripts/ci.py | 2 +- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/apps/microtvm/arduino/template_project/microtvm_api_server.py b/apps/microtvm/arduino/template_project/microtvm_api_server.py index 95f941fe3473..131f92a20829 100644 --- a/apps/microtvm/arduino/template_project/microtvm_api_server.py +++ b/apps/microtvm/arduino/template_project/microtvm_api_server.py @@ -34,7 +34,6 @@ import re from packaging import version -import serial.tools.list_ports from tvm.micro.project_api import server @@ -485,6 +484,9 @@ def flash(self, options): subprocess.run(upload_cmd, check=True) def open_transport(self, options): + import serial + import serial.tools.list_ports + # Zephyr example doesn't throw an error in this case if self._serial is not None: return diff --git a/apps/microtvm/pyproject.toml b/apps/microtvm/pyproject.toml index 98c769be48f5..59fdefc40ab3 100644 --- a/apps/microtvm/pyproject.toml +++ b/apps/microtvm/pyproject.toml @@ -129,7 +129,7 @@ importer-tflite = ["tflite", "tensorflow", "tensorflow-estimator"] autodocsumm = "^0.1" black = "^19.10b0" sphinx = "^3.0" -sphinx-gallery = "^0.8" +sphinx-gallery = { git = "https://github.com/sphinx-gallery/sphinx-gallery.git", branch = "master" } sphinx-rtd-theme = "^0.4" matplotlib = "^3.2" Image = "^1.5" diff --git a/docker/install/ubuntu_install_sphinx.sh b/docker/install/ubuntu_install_sphinx.sh index 12ca25b22b85..8092c3d1ea5a 100755 --- a/docker/install/ubuntu_install_sphinx.sh +++ b/docker/install/ubuntu_install_sphinx.sh @@ -29,5 +29,5 @@ pip3 install \ matplotlib \ sphinx==4.2.0 \ sphinx_autodoc_annotation \ - sphinx-gallery==0.4.0 \ + "git+https://github.com/sphinx-gallery/sphinx-gallery.git" \ sphinx_rtd_theme diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index 83d7d3c87083..d0696190f510 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -36,7 +36,7 @@ # .. image:: https://raw.githubusercontent.com/guberti/web-data/micro-train-tutorial-data/images/utilities/colab_button.png # :align: center # :target: https://colab.research.google.com/github/guberti/tvm-site/blob/asf-site/docs/_downloads/a7c7ea4b5017ae70db1f51dd8e6dcd82/micro_train.ipynb -# :width: 600px +# :width: 300px # # Motivation # ---------- @@ -258,16 +258,17 @@ # # Our applications generally don't need perfect accuracy - 90% is good enough. We can thus use the # older and smaller MobileNet V1 architecture. But this *still* won't be small enough - by default, -# MobileNet V1 with 224x224 inputs and depth 1.0 takes ~50 MB to just **store**. To reduce the size +# MobileNet V1 with 224x224 inputs and alpha 1.0 takes ~50 MB to just **store**. To reduce the size # of the model, there are three knobs we can turn. First, we can reduce the size of the input images -# from 224x224 to 96x96 or 64x64, and Keras makes it easy to do this. We can also reduce the **depth** -# of the model, from 1.0 to 0.25. And if we were really strapped for space, we could reduce the +# from 224x224 to 96x96 or 64x64, and Keras makes it easy to do this. We can also reduce the **alpha** +# of the model, from 1.0 to 0.25, which downscales the width of the network (and the number of +# filters) by a factor of four. And if we were really strapped for space, we could reduce the # number of **channels** by making our model take grayscale images instead of RGB ones. # -# In this tutorial, we will use an RGB 64x64 input image and 0.25 depth scale. This is not quite +# In this tutorial, we will use an RGB 64x64 input image and alpha 0.25. This is not quite # ideal, but it allows the finished model to fit in 192 KB of RAM, while still letting us perform -# transfer learning using the official Tensorflow source models (if we used depth scale <0.25 or -# a grayscale input, we wouldn't be able to do this). +# transfer learning using the official Tensorflow source models (if we used alpha <0.25 or a +# grayscale input, we wouldn't be able to do this). # # What is Transfer Learning? # ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -369,14 +370,12 @@ # the conversion. By default, TFLite keeps the inputs and outputs of our model as floats, so we must # explicitly tell it to avoid this behavior. -converter = tf.lite.TFLiteConverter.from_keras_model(model) - - def representative_dataset(): for image_batch, label_batch in full_dataset.take(10): yield [image_batch] +converter = tf.lite.TFLiteConverter.from_keras_model(model) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.representative_dataset = representative_dataset converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] @@ -431,7 +430,7 @@ def representative_dataset(): # # Generating our project # ^^^^^^^^^^^^^^^^^^^^^^ -# Next, we'll compile the model to TVM's MLF (machine learning format) intermediate representation, +# Next, we'll compile the model to TVM's MLF (model library format) intermediate representation, # which consists of C/C++ code and is designed for autotuning. To improve performance, we'll tell # TVM that we're compiling for the ``nrf52840`` microprocessor (the one the Nano 33 BLE uses). We'll # also tell it to use the C runtime (abbreviated ``crt``) and to use ahead-of-time memory allocation @@ -563,6 +562,9 @@ def representative_dataset(): # not throw any compiler errors: shutil.rmtree(f"{FOLDER}/models/project/build", ignore_errors=True) +# sphinx_gallery_start_ignore +arduino_project = MagicMock() +# sphinx_gallery_end_ignore arduino_project.build() print("Compilation succeeded!") diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py index d6a3d457fe65..ed3bf401c8ad 100755 --- a/tests/scripts/ci.py +++ b/tests/scripts/ci.py @@ -260,7 +260,7 @@ def docs( "tlcpack-sphinx-addon==0.2.1", "synr==0.5.0", "image==1.5.33", - "git+https://github.com/guberti/sphinx-gallery.git@ipynb-include-bash", + "git+https://github.com/sphinx-gallery/sphinx-gallery.git", "sphinx-rtd-theme==1.0.0", "matplotlib==3.3.4", "commonmark==0.9.1", From bde134afdd19ed7f3e122a8db482bfc67329c33c Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Sat, 21 May 2022 14:37:18 -0400 Subject: [PATCH 10/14] Add warning to ignored list Try to avoid throwing warning Fix linting, try verbosity filter Try adding to ignore file Remove fix attempts --- gallery/how_to/work_with_microtvm/micro_train.py | 3 +++ tests/scripts/task_python_docs.sh | 1 + 2 files changed, 4 insertions(+) diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index d0696190f510..8cac1eaa8383 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -370,6 +370,7 @@ # the conversion. By default, TFLite keeps the inputs and outputs of our model as floats, so we must # explicitly tell it to avoid this behavior. + def representative_dataset(): for image_batch, label_batch in full_dataset.take(10): yield [image_batch] @@ -563,6 +564,8 @@ def representative_dataset(): shutil.rmtree(f"{FOLDER}/models/project/build", ignore_errors=True) # sphinx_gallery_start_ignore +from unittest.mock import MagicMock + arduino_project = MagicMock() # sphinx_gallery_end_ignore arduino_project.build() diff --git a/tests/scripts/task_python_docs.sh b/tests/scripts/task_python_docs.sh index b4b52ed36ccf..72ef8a54c200 100755 --- a/tests/scripts/task_python_docs.sh +++ b/tests/scripts/task_python_docs.sh @@ -84,6 +84,7 @@ IGNORED_WARNINGS=( 'autotvm:Cannot find config for target=llvm -keys=cpu -link-params=0' 'autotvm:One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.' 'autotvm:Cannot find config for target=cuda -keys=cuda,gpu' + 'absl:For model inputs containing unsupported operations' ) JOINED_WARNINGS=$(join_by '|' "${IGNORED_WARNINGS[@]}") From b994a0f7b93c060b237b52265561fc300f99ca5c Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Thu, 26 May 2022 00:36:26 -0400 Subject: [PATCH 11/14] Grammar fixes --- .../how_to/work_with_microtvm/micro_train.py | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/gallery/how_to/work_with_microtvm/micro_train.py b/gallery/how_to/work_with_microtvm/micro_train.py index 8cac1eaa8383..378fe56d9da0 100644 --- a/gallery/how_to/work_with_microtvm/micro_train.py +++ b/gallery/how_to/work_with_microtvm/micro_train.py @@ -60,14 +60,19 @@ # Installing the Prerequisites # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # -# To run this tutorial, we will need Tensorflow and TFLite to train our model, pyserial and tlcpack -# (a community build of TVM) to compile and test it, and imagemagick and curl to preprocess data. -# We will also need to install the Arduino CLI and the mbed_nano package to test our model. +# This tutorial will use TensorFlow to train the model - a widely used machine learning library +# created by Google. TensorFlow is a very low-level library, however, so we will the Keras +# interface to talk to TensorFlow. We will also use TensorFlow Lite to perform quantization on +# our model, as TensorFlow by itself does not support this. +# +# Once we have our generated model, we will use TVM to compile and test it. To avoid having to +# build from source, we'll install ``tlcpack`` - a community build of TVM. Lastly, we'll also +# install ``imagemagick`` and ``curl`` to preprocess data: # # .. code-block:: bash # # %%bash -# pip install -q tensorflow tflite pyserial +# pip install -q tensorflow tflite # pip install -q tlcpack-nightly -f https://tlcpack.ai/wheels # apt-get -qq install imagemagick curl # @@ -82,7 +87,7 @@ # This tutorial demonstrates training a neural network, which is requires a lot of computing power # and will go much faster if you have a GPU. If you are viewing this tutorial on Google Colab, you # can enable a GPU by going to **Runtime->Change runtime type** and selecting "GPU" as our hardware -# accelerator. If you are running locally, you can `follow Tensorflow's guide `_ instead. +# accelerator. If you are running locally, you can `follow TensorFlow's guide `_ instead. # # We can test our GPU installation with the following code: @@ -131,7 +136,7 @@ # a small enough fraction not to matter - just keep in mind that this will drive down our percieved # accuracy slightly. # -# We could use the Tensorflow dataloader utilities, but we'll instead do it manually to make sure +# We could use the TensorFlow dataloader utilities, but we'll instead do it manually to make sure # it's easy to change the datasets being used. We'll end up with the following file hierarchy: # # .. code-block:: @@ -267,7 +272,7 @@ # # In this tutorial, we will use an RGB 64x64 input image and alpha 0.25. This is not quite # ideal, but it allows the finished model to fit in 192 KB of RAM, while still letting us perform -# transfer learning using the official Tensorflow source models (if we used alpha <0.25 or a +# transfer learning using the official TensorFlow source models (if we used alpha <0.25 or a # grayscale input, we wouldn't be able to do this). # # What is Transfer Learning? @@ -290,10 +295,11 @@ # We can take advantage of this by starting training with a MobileNet model that was trained on # ImageNet, and already knows how to identify those lines and shapes. We can then just remove the # last few layers from this pretrained model, and add our own final layers. We'll then train this -# conglomerate model for a few epochs on our cars vs non-cars dataset, to fine tune the first layers -# and train from scratch the last layers. +# conglomerate model for a few epochs on our cars vs non-cars dataset, to adjust the first layers +# and train from scratch the last layers. This process of training an already-partially-trained +# model is called *fine-tuning*. # -# Source MobileNets for transfer learning have been `pretrained by the Tensorflow folks `_, so we +# Source MobileNets for transfer learning have been `pretrained by the TensorFlow folks `_, so we # can just download the one closest to what we want (the 128x128 input model with 0.25 depth scale). os.makedirs(f"{FOLDER}/models") @@ -326,8 +332,8 @@ model.add(tf.keras.layers.Dense(2, activation="softmax")) ###################################################################### -# Training Our Network -# ^^^^^^^^^^^^^^^^^^^^ +# Fine Tuning Our Network +# ^^^^^^^^^^^^^^^^^^^^^^^ # When training neural networks, we must set a parameter called the **learning rate** that controls # how fast our network learns. It must be set carefully - too slow, and our network will take # forever to train; too fast, and our network won't be able to learn some fine details. Generally @@ -361,8 +367,8 @@ # # To address both issues we will **quantize** the model - representing the weights as eight bit # integers. It's more complex than just rounding, though - to get the best performance, TensorFlow -# tracks how each neuron in our model activates, so we can figure out how to best represent the -# while being relatively truthful to the original model. +# tracks how each neuron in our model activates, so we can figure out how most accurately simulate +# the neuron's original activations with integer operations. # # We will help TensorFlow do this by creating a representative dataset - a subset of the original # that is used for tracking how those neurons activate. We'll then pass this into a ``TFLiteConverter`` @@ -388,7 +394,7 @@ def representative_dataset(): ###################################################################### # Download the Model if Desired # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# We've now got a finished model, that you can use locally or in other tutorials (try autotuning +# We've now got a finished model that you can use locally or in other tutorials (try autotuning # this model or viewing it on `https://netron.app/ `_). But before we do # those things, we'll have to write it to a file (``quantized.tflite``). If you're running this # tutorial on Google Colab, you'll have to uncomment the last two lines to download the file @@ -403,8 +409,8 @@ def representative_dataset(): ###################################################################### # Compiling With TVM For Arduino # ------------------------------ -# Tensorflow has a built-in framework for deploying to microcontrollers - `TFLite Micro `_. However, -# it's poorly supported by development boards, and does not support autotuning. We will use Apache +# TensorFlow has a built-in framework for deploying to microcontrollers - `TFLite Micro `_. However, +# it's poorly supported by development boards and does not support autotuning. We will use Apache # TVM instead. # # TVM can be used either with its command line interface (``tvmc``) or with its Python interface. The @@ -481,8 +487,8 @@ def representative_dataset(): # Testing our Arduino Project # --------------------------- # Consider the following two 224x224 images from the author's camera roll - one of a car, one not. -# We will test our Arduino project by loading both of these images, and executing the compiled model -# on them both. +# We will test our Arduino project by loading both of these images and executing the compiled model +# on them. # # .. image:: https://raw.githubusercontent.com/guberti/web-data/micro-train-tutorial-data/testdata/microTVM/data/model_train_images_combined.png # :align: center @@ -494,7 +500,7 @@ def representative_dataset(): # # It's also challenging to load raw data onto an Arduino, as only C/CPP files (and similar) are # compiled. We can work around this by embedding our raw data in a hard-coded C array with the -# built-in utility ``bin2c``, that will output a file resembling the following: +# built-in utility ``bin2c`` that will output a file like below: # # .. code-block:: c # @@ -559,8 +565,8 @@ def representative_dataset(): # Now that our project has been generated, TVM's job is mostly done! We can still call # ``arduino_project.build()`` and ``arduino_project.upload()``, but these just use ``arduino-cli``'s # compile and flash commands underneath. We could also begin autotuning our model, but that's a -# subject for a different tutorial. To finish up, we'll first test that our program compiles does -# not throw any compiler errors: +# subject for a different tutorial. To finish up, we'll verify no compiler errors are thrown +# by our project: shutil.rmtree(f"{FOLDER}/models/project/build", ignore_errors=True) # sphinx_gallery_start_ignore @@ -622,8 +628,8 @@ def representative_dataset(): # Other object results: # 0, 255 # -# The first number represents the model's confidence that the object **is** a car, and ranges from -# 0-255. The second number represents the model's confidence that the object **is not** a car, and +# The first number represents the model's confidence that the object **is** a car and ranges from +# 0-255. The second number represents the model's confidence that the object **is not** a car and # is also 0-255. These results mean the model is very sure that the first image is a car, and the # second image is not (which is correct). Hence, our model is working! # @@ -632,7 +638,7 @@ def representative_dataset(): # In this tutorial, we used transfer learning to quickly train an image recognition model to # identify cars. We modified its input dimensions and last few layers to make it better at this, # and to make it faster and smaller. We then quantified the model and compiled it using TVM to -# create an Arduino sketch. Lastly, we tested the model using two static images, to prove it works +# create an Arduino sketch. Lastly, we tested the model using two static images to prove it works # as intended. # # Next Steps From 14e5750c7a203f55941ef833df87e47837dcecf8 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Fri, 27 May 2022 14:06:49 -0400 Subject: [PATCH 12/14] Address code review comments Include full git hashes --- apps/microtvm/pyproject.toml | 2 +- docker/install/ubuntu_install_sphinx.sh | 2 +- tests/scripts/ci.py | 3 ++- tests/scripts/task_python_docs.sh | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/microtvm/pyproject.toml b/apps/microtvm/pyproject.toml index 59fdefc40ab3..597632859229 100644 --- a/apps/microtvm/pyproject.toml +++ b/apps/microtvm/pyproject.toml @@ -129,7 +129,7 @@ importer-tflite = ["tflite", "tensorflow", "tensorflow-estimator"] autodocsumm = "^0.1" black = "^19.10b0" sphinx = "^3.0" -sphinx-gallery = { git = "https://github.com/sphinx-gallery/sphinx-gallery.git", branch = "master" } +sphinx-gallery = { git = "https://github.com/sphinx-gallery/sphinx-gallery.git", rev = "6142f179" } sphinx-rtd-theme = "^0.4" matplotlib = "^3.2" Image = "^1.5" diff --git a/docker/install/ubuntu_install_sphinx.sh b/docker/install/ubuntu_install_sphinx.sh index 8092c3d1ea5a..96023fa6e633 100755 --- a/docker/install/ubuntu_install_sphinx.sh +++ b/docker/install/ubuntu_install_sphinx.sh @@ -29,5 +29,5 @@ pip3 install \ matplotlib \ sphinx==4.2.0 \ sphinx_autodoc_annotation \ - "git+https://github.com/sphinx-gallery/sphinx-gallery.git" \ + "git+https://github.com/sphinx-gallery/sphinx-gallery.git@6142f1791151849b5bec4bf3959f75697ba226cd" \ sphinx_rtd_theme diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py index ed3bf401c8ad..52a9c6efbd22 100755 --- a/tests/scripts/ci.py +++ b/tests/scripts/ci.py @@ -260,7 +260,8 @@ def docs( "tlcpack-sphinx-addon==0.2.1", "synr==0.5.0", "image==1.5.33", - "git+https://github.com/sphinx-gallery/sphinx-gallery.git", + # Temporary git link until a release is published + "git+https://github.com/sphinx-gallery/sphinx-gallery.git@6142f1791151849b5bec4bf3959f75697ba226cd", "sphinx-rtd-theme==1.0.0", "matplotlib==3.3.4", "commonmark==0.9.1", diff --git a/tests/scripts/task_python_docs.sh b/tests/scripts/task_python_docs.sh index 72ef8a54c200..da1a2c9c5636 100755 --- a/tests/scripts/task_python_docs.sh +++ b/tests/scripts/task_python_docs.sh @@ -84,7 +84,8 @@ IGNORED_WARNINGS=( 'autotvm:Cannot find config for target=llvm -keys=cpu -link-params=0' 'autotvm:One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.' 'autotvm:Cannot find config for target=cuda -keys=cuda,gpu' - 'absl:For model inputs containing unsupported operations' + # Warning is thrown during TFLite quantization for micro_train tutorial + 'absl:For model inputs containing unsupported operations which cannot be quantized, the `inference_input_type` attribute will default to the original type.' ) JOINED_WARNINGS=$(join_by '|' "${IGNORED_WARNINGS[@]}") From 5095a5c3995e978c3436b925033db7699ba6bfac Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Fri, 3 Jun 2022 10:56:54 -0400 Subject: [PATCH 13/14] Rerun tests From ed96b7ec376ab87b8b65532e1f6b16d815f12fc4 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Fri, 3 Jun 2022 14:47:22 -0400 Subject: [PATCH 14/14] Rerun again