diff --git a/docs/PythonPackage.md b/docs/PythonPackage.md
new file mode 100644
index 0000000000..038615ea3f
--- /dev/null
+++ b/docs/PythonPackage.md
@@ -0,0 +1,19 @@
+The Python package, onnxmlir, provides an installable package to use onnx-mlir
+compiler in a similar way to onnxruntime. Also the package supports the way to 
+run model by `utils/RunONNXModel.py`.
+
+The source of the package is located at `onnx-mlir/utils/onnxmlir`. The main python code, `onnxmlir/src/onnxmlir/RunONNXModel.py` should be the same as `onnx-mlir/utils/RunONNXModel.py`. You can use target `OMCreateONNXMLIRSource` to create the installable directory in your build directory.
+The package can be installed from your local directory with `pip3 install your_path/onnx-mlir/build/utils/onnxmlir`
+
+Follow instructions in https://packaging.python.org/en/latest/tutorials/packaging-projects/
+commands to use under the top directory onnxmlir
+```
+python3 -m pip install --upgrade build
+python3 -m build
+#After get the api-token
+python3 -m pip install --upgrade twine
+python3 -m twine upload --repository testpypi dist/*
+```
+Different from document, the prompt asked only for the api-token
+
+Examples can be found at onnx-mlir/util/onnxmlir/tests.
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 6d2df0f602..170222aa07 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -32,3 +32,8 @@ add_custom_target(OMONNXCheckVersion
 # Scan files for supported ops
 add_onnx_mlir_supported_ops(${ONNX_MLIR_SRC_ROOT}/test/backend/inference_backend.py cpu)
 add_onnx_mlir_supported_ops(${ONNX_MLIR_SRC_ROOT}/test/accelerators/NNPA/backend/CMakeLists.txt NNPA)
+
+add_custom_target(OMCreateONNXMLIRSource
+        COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/onnxmlir ${CMAKE_CURRENT_BINARY_DIR}
+        COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/RunONNXModel.py ${CMAKE_CURRENT_BINARY_DIR}/onnxmlir/src/onnxmlir)
+
diff --git a/utils/RunONNXModel.py b/utils/RunONNXModel.py
index d58b4fff56..017401c475 100755
--- a/utils/RunONNXModel.py
+++ b/utils/RunONNXModel.py
@@ -22,6 +22,7 @@
 import tempfile
 import json
 import importlib.util
+import shlex
 
 from onnx import numpy_helper
 from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
@@ -160,6 +161,9 @@ def check_non_negative(argname, value):
     help="Path to a folder containing reference inputs and outputs stored in protobuf."
     " If --verify=ref, inputs and outputs are reference data for verification",
 )
+data_group.add_argument(
+    "--inputs-from-arrays", help="List of numpy arrays used as inputs for inference"
+)
 data_group.add_argument(
     "--load-ref-from-numpy",
     metavar="PATH",
@@ -602,140 +606,216 @@ def data_without_top_bottom_quartile(data, percent):
     return data[trim:-trim]
 
 
-def main():
-    if not (args.model or args.load_so):
-        print("error: no input model, use argument --model and/or --load-so.")
-        print(parser.format_usage())
-        exit(1)
+class InferenceSession:
+    """
+    in onnxruntime:
+    class onnxruntime.InferenceSession(path_or_bytes: str | bytes | os.PathLike, sess_options: onnxruntime.SessionOptions | None = None, providers: Sequence[str | tuple[str, dict[Any, Any]]] | None = None, provider_options: Sequence[dict[Any, Any]] | None = None, **kwargs)[source]
 
-    # Get shape information if given.
-    # args.shape_info in the form of 'input_index:d1xd2, input_index:d1xd2'
-    input_shapes = {}
-    if args.shape_info:
-        for input_shape in args.shape_info.strip().split(","):
-            input_index_shape = input_shape.split(":")
-            input_index = input_index_shape[0]
-            assert not (input_index in input_shapes), "Duplicate input indices"
-            dims = [int(d) for d in input_index_shape[1].split("x")]
-            input_shapes[int(input_index)] = dims
-
-    # Load the onnx model.
-    if args.model and args.model.endswith(".onnx"):
-        model = onnx.load(args.model)
-        # Get names of all intermediate tensors and modify model such that each of
-        # them will be an output of the model. If using onnxruntime for
-        # verification, we can then verify every operation output.
-        output_names = [o.name for o in model.graph.output]
-        output_names = list(OrderedDict.fromkeys(output_names))
-        if args.verify and args.verify == "onnxruntime" and args.verify_all_ops:
-            print("Extending the onnx model to check every node output ...\n")
-            output_names = sum(
-                [[n for n in node.output if n != ""] for node in model.graph.node], []
-            )
+    In onnxmlir, session_options and provider will be merged into kwargs, and
+    ignored. onnxruntime.SessionOptions may contain some useful info,
+    but onnxruntime package is needed to interpret it. Therefore, it is ignored now.
+    Another argument, 'options' is added for onnxmlir to specify options for RunONNXModel.py
+    """
+
+    def __init__(self, model_name, **kwargs):
+        global args
+        if "options" in kwargs.keys():
+            options = kwargs["options"]
+            args = parser.parse_args(shlex.split(options))
+
+        if model_name:
+            if model_name.endswith(".onnx") or model_name.endswith(".mlir"):
+                args.model = model_name
+            else:
+                args.load_so = compiled_name
+
+        # Get shape information if given.
+        # args.shape_info in the form of 'input_index:d1xd2, input_index:d1xd2'
+        input_shapes = {}
+        if args.shape_info:
+            for input_shape in args.shape_info.strip().split(","):
+                input_index_shape = input_shape.split(":")
+                input_index = input_index_shape[0]
+                assert not (input_index in input_shapes), "Duplicate input indices"
+                dims = [int(d) for d in input_index_shape[1].split("x")]
+                input_shapes[int(input_index)] = dims
+
+        # Load the onnx model.
+        if args.model and args.model.endswith(".onnx"):
+            model = onnx.load(args.model)
+            # Get names of all intermediate tensors and modify model such that each of
+            # them will be an output of the model. If using onnxruntime for
+            # verification, we can then verify every operation output.
+            output_names = [o.name for o in model.graph.output]
             output_names = list(OrderedDict.fromkeys(output_names))
-            model = extend_model_output(model, output_names)
+            if args.verify and args.verify == "onnxruntime" and args.verify_all_ops:
+                print("Extending the onnx model to check every node output ...\n")
+                output_names = sum(
+                    [[n for n in node.output if n != ""] for node in model.graph.node],
+                    [],
+                )
+                output_names = list(OrderedDict.fromkeys(output_names))
+                model = extend_model_output(model, output_names)
 
-            # Save the modified onnx file of the model if required.
-            if args.save_onnx:
-                print("Saving modified onnx model to ", args.save_onnx, "\n")
-                onnx.save(model, args.save_onnx)
+                # Save the modified onnx file of the model if required.
+                if args.save_onnx:
+                    print("Saving modified onnx model to ", args.save_onnx, "\n")
+                    onnx.save(model, args.save_onnx)
 
-    # Compile, run, and verify.
-    with tempfile.TemporaryDirectory() as temp_dir:
-        print("Temporary directory has been created at {}".format(temp_dir))
+        # Compile, run, and verify.
+        with tempfile.TemporaryDirectory() as temp_dir:
+            print("Temporary directory has been created at {}".format(temp_dir))
 
-        shared_lib_path = ""
+            shared_lib_path = ""
 
-        # If a shared library is given, use it without compiling the ONNX model.
-        # Otherwise, compile the ONNX model.
-        if args.load_so:
-            shared_lib_path = args.load_so
-        else:
-            print("Compiling the model ...")
-            # Prepare input and output paths.
-            output_path = os.path.join(temp_dir, "model")
-            shared_lib_path = os.path.join(temp_dir, "model.so")
-            if args.model.endswith(".onnx"):
-                input_model_path = os.path.join(temp_dir, "model.onnx")
-                onnx.save(model, input_model_path)
-            elif args.model.endswith(".mlir") or args.model.endswith(".onnxtext"):
-                input_model_path = args.model
+            # If a shared library is given, use it without compiling the ONNX model.
+            # Otherwise, compile the ONNX model.
+            if args.load_so:
+                shared_lib_path = args.load_so
             else:
-                print("Invalid input model path. Must end with .onnx or .mlir")
-                exit(1)
+                print("Compiling the model ...")
+                # Prepare input and output paths.
+                output_path = os.path.join(temp_dir, "model")
+                shared_lib_path = os.path.join(temp_dir, "model.so")
+                if args.model.endswith(".onnx"):
+                    input_model_path = os.path.join(temp_dir, "model.onnx")
+                    onnx.save(model, input_model_path)
+                elif args.model.endswith(".mlir") or args.model.endswith(".onnxtext"):
+                    input_model_path = args.model
+                else:
+                    print("Invalid input model path. Must end with .onnx or .mlir")
+                    exit(1)
 
-            # Prepare compiler arguments.
-            command_str = [ONNX_MLIR]
-            if args.compile_args:
-                command_str += args.compile_args.split()
-            if args.compile_using_input_shape:
-                # Use shapes of the reference inputs to compile the model.
-                assert args.load_ref or args.load_ref_from_numpy, "No data folder given"
-                assert "shapeInformation" not in command_str, "shape info was set"
-                shape_info = "--shapeInformation="
-                for i in range(len(inputs)):
-                    shape_info += (
-                        str(i) + ":" + "x".join([str(d) for d in inputs[i].shape]) + ","
+                # Prepare compiler arguments.
+                command_str = [ONNX_MLIR]
+                if args.compile_args:
+                    command_str += args.compile_args.split()
+                if args.compile_using_input_shape:
+                    # Use shapes of the reference inputs to compile the model.
+                    assert (
+                        args.load_ref or args.load_ref_from_numpy
+                    ), "No data folder given"
+                    assert "shapeInformation" not in command_str, "shape info was set"
+                    shape_info = "--shapeInformation="
+                    for i in range(len(inputs)):
+                        shape_info += (
+                            str(i)
+                            + ":"
+                            + "x".join([str(d) for d in inputs[i].shape])
+                            + ","
+                        )
+                    shape_info = shape_info[:-1]
+                    command_str += [shape_info]
+                    warning(
+                        "the shapes of the model's inputs will be "
+                        "changed to the shapes of the inputs in the data folder"
                     )
-                shape_info = shape_info[:-1]
-                command_str += [shape_info]
-                warning(
-                    "the shapes of the model's inputs will be "
-                    "changed to the shapes of the inputs in the data folder"
-                )
-            command_str += [input_model_path]
-            command_str += ["-o", output_path]
+                command_str += [input_model_path]
+                command_str += ["-o", output_path]
 
-            # Compile the model.
-            start = time.perf_counter()
-            ok, msg = execute_commands(command_str)
-            # Dump the compilation log into a file.
-            if args.log_to_file:
-                log_file = (
-                    args.log_to_file
-                    if args.log_to_file.startswith("/")
-                    else os.path.join(os.getcwd(), args.log_to_file)
-                )
-                print("  Compilation log is dumped into {}".format(log_file))
-                with open(log_file, "w") as f:
-                    f.write(msg)
-            if not ok:
-                print(msg)
-                exit(1)
-            end = time.perf_counter()
-            print("  took ", end - start, " seconds.\n")
+                # Compile the model.
+                start = time.perf_counter()
+                ok, msg = execute_commands(command_str)
+                # Dump the compilation log into a file.
+                if args.log_to_file:
+                    log_file = (
+                        args.log_to_file
+                        if args.log_to_file.startswith("/")
+                        else os.path.join(os.getcwd(), args.log_to_file)
+                    )
+                    print("  Compilation log is dumped into {}".format(log_file))
+                    with open(log_file, "w") as f:
+                        f.write(msg)
+                if not ok:
+                    print(msg)
+                    exit(1)
+                end = time.perf_counter()
+                print("  took ", end - start, " seconds.\n")
 
-            # Save the generated .so file of the model if required.
-            if args.save_so:
-                print("Saving the shared library to", args.save_so, "\n")
-                execute_commands(["rsync", "-ar", shared_lib_path, args.save_so])
+                # Save the generated .so file of the model if required.
+                if args.save_so:
+                    print("Saving the shared library to", args.save_so, "\n")
+                    execute_commands(["rsync", "-ar", shared_lib_path, args.save_so])
 
-            # Exit if only compiling the model.
-            if args.compile_only:
-                exit(0)
+                # Exit if only compiling the model.
+                if args.compile_only:
+                    exit(0)
 
-        # Use the generated shared library to create an execution session.
-        print("Loading the compiled model ...")
-        start = time.perf_counter()
-        if args.load_so:
-            sess = OMExecutionSession(shared_lib_path, tag="None")
-        else:
-            sess = OMExecutionSession(shared_lib_path)
-        end = time.perf_counter()
-        print("  took ", end - start, " seconds.\n")
+            # Use the generated shared library to create an execution session.
+            print("Loading the compiled model ...")
+            start = time.perf_counter()
+            if args.load_so:
+                sess = OMExecutionSession(shared_lib_path, tag="None")
+            else:
+                sess = OMExecutionSession(shared_lib_path)
+            end = time.perf_counter()
+            print("  took ", end - start, " seconds.\n")
+            self.sess = sess
+
+    """
+    From onnxruntime API:
+
+    run(output_names, input_feed, run_options=None)
+    Compute the predictions.
+
+    PARAMETERS:
+    output_names – name of the outputs
+    input_feed – dictionary { input_name: input_value }
+    run_options – See onnxruntime.RunOptions.
+    RETURNS:
+    list of results, every result is either a numpy array, a sparse tensor, a list or a dictionary.
+    
+    For onnxmlir, the run_options is ignored. If 'input_feed' is None, the
+    input could be randomly generated or read from file, as args specified.
+    In future, add '--shape-info' here. Better than in InferenceSession to
+    allow different shape from run to run. 
+    """
+
+    def run(self, outputname, input_feed, **kwargs):
+        # Get shape information if given.
+        # args.shape_info in the form of 'input_index:d1xd2, input_index:d1xd2'
+        input_shapes = {}
+        if args.shape_info:
+            for input_shape in args.shape_info.strip().split(","):
+                input_index_shape = input_shape.split(":")
+                input_index = input_index_shape[0]
+                assert not (input_index in input_shapes), "Duplicate input indices"
+                dims = [int(d) for d in input_index_shape[1].split("x")]
+                input_shapes[int(input_index)] = dims
 
         # Get the input and output signature.
-        input_signature = sess.input_signature()
-        output_signature = sess.output_signature()
+        input_signature = self.sess.input_signature()
+        output_signature = self.sess.output_signature()
         input_names = get_names_in_signature(input_signature)
         output_names = get_names_in_signature(output_signature)
 
+        inputs = []
+        # Get input from input_feed, if input_feed is provided
+        if input_feed:
+            if isinstance(input_feed, dict):
+                for name in input_names:
+                    if name in input_feed:
+                        inputs.append(input_feed[name])
+                    else:
+                        print("input name given: ", input_feed.keys())
+                        print("input name expected by model: ", input_names)
+                        print("do not match")
+                        exit(1)
+                # Since Python guarantees the order of values in a dictionary,
+                # the name check could be ignored as follows:
+                # inputs = list(input_feed.values())
+            else:
+                inputs = input_feed
+            args.inputs_from_arrays = inputs
+
         # Prepare input data.
         inputs = []
         if args.load_ref:
             inputs = read_input_from_refs(len(input_names), args.load_ref)
         elif args.load_ref_from_numpy:
             inputs = read_input_from_refs(len(input_names), args.load_ref_from_numpy)
+        elif args.inputs_from_arrays:
+            inputs = args.inputs_from_arrays
         else:
             inputs = generate_random_input(input_signature, input_shapes)
 
@@ -756,14 +836,14 @@ def main():
         print("Running inference ...")
         for i in range(args.warmup):
             start = time.perf_counter()
-            outs = sess.run(inputs)
+            outs = self.sess.run(inputs)
             end = time.perf_counter()
             print("  {} warmup: {} seconds".format(ordinal(i + 1), end - start))
 
         perf_results = []
         for i in range(args.n_iteration):
             start = time.perf_counter()
-            outs = sess.run(inputs)
+            outs = self.sess.run(inputs)
             end = time.perf_counter()
             elapsed = end - start
             perf_results += [elapsed]
@@ -868,6 +948,22 @@ def main():
                         "using atol={}, rtol={} ...".format(args.atol, args.rtol),
                     )
                     verify_outs(outs[i], ref_outs[i])
+        if outputname:
+            res = {outputname[i]: outs[i] for i in range(len(outs))}
+            return res
+        else:
+            return outs
+
+
+# Standalone driver
+def main():
+    if not (args.model or args.load_so):
+        print("error: no input model, use argument --model and/or --load-so.")
+        print(parser.format_usage())
+        exit(1)
+
+    sess = InferenceSession(None)
+    return sess.run(None, None)
 
 
 if __name__ == "__main__":
diff --git a/utils/onnxmlir/LICENSE b/utils/onnxmlir/LICENSE
new file mode 100644
index 0000000000..20e4bd8566
--- /dev/null
+++ b/utils/onnxmlir/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/utils/onnxmlir/README.md b/utils/onnxmlir/README.md
new file mode 100644
index 0000000000..af1f8b18c0
--- /dev/null
+++ b/utils/onnxmlir/README.md
@@ -0,0 +1,68 @@
+This package provides a python interface to use onnx-mlir compiler to run inference of an onnx model similar to onnxruntime interface. The basic parameters of the interface are supported with options ignored. 
+
+## Description
+Let's start with [an onnxrutime example](https://onnxruntime.ai/docs/get-started/with-python#pytorch-cv):
+```
+import onnxruntime as ort
+import numpy as np
+x, y = test_data[0][0], test_data[0][1]
+ort_sess = ort.InferenceSession('fashion_mnist_model.onnx')
+outputs = ort_sess.run(None, {'input': x.numpy()})
+
+# Print Result
+predicted, actual = classes[outputs[0][0].argmax(0)], classes[y]
+print(f'Predicted: "{predicted}", Actual: "{actual}"')
+```
+
+With onnxmlir package, onnx-mlir can be used to replace onnxrutnime as follows:
+```
+import onnxmlir as ort
+import numpy as np
+x, y = test_data[0][0], test_data[0][1]
+ort_sess = ort.InferenceSession('fashion_mnist_model.onnx')
+outputs = ort_sess.run(None, {'input': x.numpy()})
+
+# Print Result
+predicted, actual = classes[outputs[0][0].argmax(0)], classes[y]
+print(f'Predicted: "{predicted}", Actual: "{actual}"')
+```
+
+In current version, the onnx-mlir compiler is not contained in the python 
+package yet. Use env variable ONNX_MLIR_HOME to specify the location of the onnx-mlir compiler to be used. For example `export ONNX_MLIR_HOME=/mypath/onnx-mlir/build/Debug`. 
+
+Another way to run the onnx model is to precompile it into a static library first. 
+```
+onnx-mlir -O3 fashin_mnist_mode.onnx
+```
+
+This compilation will generate fashin_mnist_mode.so. Then the library can be used as model in the Python script as follows:
+```
+import onnxmlir as ort
+import numpy as np
+x, y = test_data[0][0], test_data[0][1]
+ort_sess = ort.InferenceSession('fashion_mnist_model.so')
+outputs = ort_sess.run(None, {'input': x.numpy()})
+
+# Print Result
+predicted, actual = classes[outputs[0][0].argmax(0)], classes[y]
+print(f'Predicted: "{predicted}", Actual: "{actual}"')
+```
+
+This package supports list or dictionary for input and output. For example, the input for run could be list of tensor.
+```
+outputs = ort_sess.run(None, [a, b, c])
+```
+
+Another extra named argment for InferenceSession is introduced to specify the extra arguments accepted by onnx-mlir/utils/RunONNXModel.py. Here is an example:
+```
+sess = onnxmlir.inferenceSession("test_add.onnx", options='--compile-args="-O3 --parallel" --print-output')
+```
+
+## Installation
+
+### Install from local directory
+At top of onnx-mlir: `pip3 install utils/onnxmlir`
+
+### Install from repo
+After the package is uploaded, you can install with 'pip3 install onnxmlir`
+
diff --git a/utils/onnxmlir/VERSION_NUMBER b/utils/onnxmlir/VERSION_NUMBER
new file mode 100644
index 0000000000..6e8bf73aa5
--- /dev/null
+++ b/utils/onnxmlir/VERSION_NUMBER
@@ -0,0 +1 @@
+0.1.0
diff --git a/utils/onnxmlir/pyproject.toml b/utils/onnxmlir/pyproject.toml
new file mode 100644
index 0000000000..096389e5ee
--- /dev/null
+++ b/utils/onnxmlir/pyproject.toml
@@ -0,0 +1,22 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "onnxmlir"
+version = "0.1.0"
+authors = [
+  { name="Tong Chen", email="chentong@us.ibm.com" },
+  { name="Alexandre Eichenberger", email="alexe@us.ibm.com" },
+]
+description = "Python driver to compile/run onnx model with onnx-mlir"
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: POSIX :: Linux",
+]
+
+[project.urls]
+Homepage = "https://github.com/onnx/onnx-mlir"
+Issues = "https://github.com/onnx/onnx-mlir/issues"
diff --git a/utils/onnxmlir/src/onnxmlir/__init__.py b/utils/onnxmlir/src/onnxmlir/__init__.py
new file mode 100644
index 0000000000..87080c1013
--- /dev/null
+++ b/utils/onnxmlir/src/onnxmlir/__init__.py
@@ -0,0 +1 @@
+from .RunONNXModel import InferenceSession
diff --git a/utils/onnxmlir/tests/test_1.py b/utils/onnxmlir/tests/test_1.py
new file mode 100644
index 0000000000..50564a6fde
--- /dev/null
+++ b/utils/onnxmlir/tests/test_1.py
@@ -0,0 +1,10 @@
+# Test: no name for input and output
+import numpy as np
+import onnxmlir
+
+a = np.zeros((3, 4, 5), dtype=np.float32)
+b = a + 4
+
+sess = onnxmlir.InferenceSession("test_add.onnx")
+r = sess.run(None, [a, b])
+print(r)
diff --git a/utils/onnxmlir/tests/test_2.py b/utils/onnxmlir/tests/test_2.py
new file mode 100644
index 0000000000..5cc56b2ca9
--- /dev/null
+++ b/utils/onnxmlir/tests/test_2.py
@@ -0,0 +1,11 @@
+import numpy as np
+import onnxmlir
+
+a0 = np.zeros((3, 4, 5), dtype=np.float32)
+a = a0 + 2
+b = a0 + 4
+
+sess = onnxmlir.InferenceSession("test_add.onnx")
+# The name for input is specified by onnx model.
+r = sess.run(["my_out"], {"x": a, "y": b})
+print(r)
diff --git a/utils/onnxmlir/tests/test_3.py b/utils/onnxmlir/tests/test_3.py
new file mode 100644
index 0000000000..d34defba09
--- /dev/null
+++ b/utils/onnxmlir/tests/test_3.py
@@ -0,0 +1,12 @@
+import numpy as np
+import onnxmlir
+
+a0 = np.zeros((3, 4, 5), dtype=np.float32)
+a = a0 + 2
+b = a0 + 4
+
+sess = onnxmlir.InferenceSession(
+    "test_add.onnx", options='--compile-args="-O3 --parallel" --print-output'
+)
+r = sess.run(["my_out"], {"x": a, "y": b})
+print(r)
diff --git a/utils/onnxmlir/tests/test_add.onnx b/utils/onnxmlir/tests/test_add.onnx
new file mode 100644
index 0000000000..8aa4ab2bc3
Binary files /dev/null and b/utils/onnxmlir/tests/test_add.onnx differ