pkg/apis/serving/v1alpha2/swagger.json

{
  "swagger": "2.0",
  "info": {
    "description": "Python SDK for KFServing",
    "title": "KFServing",
    "version": "v0.1"
  },
  "paths": {},
  "definitions": {
    "knative.Addressable": {
      "type": "object",
      "properties": {
        "url": {
          "$ref": "#/definitions/knative.URL"
        }
      }
    },
    "knative.Condition": {
      "description": "Conditions defines a readiness condition for a Knative resource. See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties",
      "type": "object",
      "required": [
        "type",
        "status"
      ],
      "properties": {
        "lastTransitionTime": {
          "description": "LastTransitionTime is the last time the condition transitioned from one status to another. We use VolatileTime in place of metav1.Time to exclude this from creating equality.Semantic differences (all other things held constant).",
          "$ref": "#/definitions/knative.VolatileTime"
        },
        "message": {
          "description": "A human readable message indicating details about the transition.",
          "type": "string"
        },
        "reason": {
          "description": "The reason for the condition's last transition.",
          "type": "string"
        },
        "severity": {
          "description": "Severity with which to treat failures of this type of condition. When this is not specified, it defaults to Error.",
          "type": "string"
        },
        "status": {
          "description": "Status of the condition, one of True, False, Unknown.",
          "type": "string"
        },
        "type": {
          "description": "Type of condition.",
          "type": "string"
        }
      }
    },
    "knative.URL": {
      "description": "URL is an alias of url.URL. It has custom json marshal methods that enable it to be used in K8s CRDs such that the CRD resource will have the URL but operator code can can work with url.URL struct",
      "type": "object",
      "required": [
        "Scheme",
        "Opaque",
        "User",
        "Host",
        "Path",
        "RawPath",
        "ForceQuery",
        "RawQuery",
        "Fragment"
      ],
      "properties": {
        "ForceQuery": {
          "description": "encoded path hint (see EscapedPath method)",
          "type": "boolean"
        },
        "Fragment": {
          "description": "encoded query values, without '?'",
          "type": "string"
        },
        "Host": {
          "description": "username and password information",
          "type": "string"
        },
        "Opaque": {
          "type": "string"
        },
        "Path": {
          "description": "host or host:port",
          "type": "string"
        },
        "RawPath": {
          "description": "path (relative paths may omit leading slash)",
          "type": "string"
        },
        "RawQuery": {
          "description": "append a query ('?') even if RawQuery is empty",
          "type": "string"
        },
        "Scheme": {
          "type": "string"
        },
        "User": {
          "description": "encoded opaque data",
          "$ref": "#/definitions/net.url.Userinfo"
        }
      }
    },
    "knative.VolatileTime": {
      "description": "VolatileTime wraps metav1.Time",
      "type": "object",
      "required": [
        "Time"
      ],
      "properties": {
        "Time": {
          "type": "string",
          "format": "date-time"
        }
      }
    },
    "v1alpha2.AIXExplainerSpec": {
      "description": "AIXExplainerSpec defines the arguments for configuring an AIX Explanation Server",
      "type": "object",
      "required": [
        "type"
      ],
      "properties": {
        "config": {
          "description": "Inline custom parameter settings for explainer",
          "type": "object",
          "additionalProperties": {
            "type": "string"
          }
        },
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "Defaults to latest AIX Version",
          "type": "string"
        },
        "storageUri": {
          "description": "The location of a trained explanation model",
          "type": "string"
        },
        "type": {
          "description": "The type of AIX explainer",
          "type": "string"
        }
      }
    },
    "v1alpha2.AlibiExplainerSpec": {
      "description": "AlibiExplainerSpec defines the arguments for configuring an Alibi Explanation Server",
      "type": "object",
      "required": [
        "type"
      ],
      "properties": {
        "config": {
          "description": "Inline custom parameter settings for explainer",
          "type": "object",
          "additionalProperties": {
            "type": "string"
          }
        },
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "Alibi docker image version which defaults to latest release",
          "type": "string"
        },
        "storageUri": {
          "description": "The location of a trained explanation model",
          "type": "string"
        },
        "type": {
          "description": "The type of Alibi explainer",
          "type": "string"
        }
      }
    },
    "v1alpha2.Batcher": {
      "description": "Batcher provides optional payload batcher for all endpoints",
      "type": "object",
      "properties": {
        "maxBatchSize": {
          "description": "MaxBatchSize of batcher service",
          "type": "integer",
          "format": "int32"
        },
        "maxLatency": {
          "description": "MaxLatency of batcher service",
          "type": "integer",
          "format": "int32"
        },
        "timeout": {
          "description": "Timeout of batcher service",
          "type": "integer",
          "format": "int32"
        }
      }
    },
    "v1alpha2.CustomSpec": {
      "description": "CustomSpec provides a hook for arbitrary container configuration.",
      "type": "object",
      "required": [
        "container"
      ],
      "properties": {
        "container": {
          "$ref": "#/definitions/v1.Container"
        }
      }
    },
    "v1alpha2.DeploymentSpec": {
      "description": "DeploymentSpec defines the configuration for a given InferenceService service component",
      "type": "object",
      "properties": {
        "batcher": {
          "description": "Activate request batching",
          "$ref": "#/definitions/v1alpha2.Batcher"
        },
        "logger": {
          "description": "Activate request/response logging",
          "$ref": "#/definitions/v1alpha2.Logger"
        },
        "maxReplicas": {
          "description": "This is the up bound for autoscaler to scale to",
          "type": "integer",
          "format": "int32"
        },
        "minReplicas": {
          "description": "Minimum number of replicas which defaults to 1, when minReplicas = 0 pods scale down to 0 in case of no traffic",
          "type": "integer",
          "format": "int32"
        },
        "parallelism": {
          "description": "Parallelism specifies how many requests can be processed concurrently, this sets the hard limit of the container concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).",
          "type": "integer",
          "format": "int32"
        },
        "serviceAccountName": {
          "description": "ServiceAccountName is the name of the ServiceAccount to use to run the service",
          "type": "string"
        }
      }
    },
    "v1alpha2.EndpointSpec": {
      "type": "object",
      "required": [
        "predictor"
      ],
      "properties": {
        "explainer": {
          "description": "Explainer defines the model explanation service spec, explainer service calls to predictor or transformer if it is specified.",
          "$ref": "#/definitions/v1alpha2.ExplainerSpec"
        },
        "predictor": {
          "description": "Predictor defines the model serving spec",
          "$ref": "#/definitions/v1alpha2.PredictorSpec"
        },
        "transformer": {
          "description": "Transformer defines the pre/post processing before and after the predictor call, transformer service calls to predictor service.",
          "$ref": "#/definitions/v1alpha2.TransformerSpec"
        }
      }
    },
    "v1alpha2.ExplainerSpec": {
      "description": "ExplainerSpec defines the arguments for a model explanation server, The following fields follow a \"1-of\" semantic. Users must specify exactly one spec.",
      "type": "object",
      "properties": {
        "aix": {
          "description": "Spec for AIX explainer",
          "$ref": "#/definitions/v1alpha2.AIXExplainerSpec"
        },
        "alibi": {
          "description": "Spec for alibi explainer",
          "$ref": "#/definitions/v1alpha2.AlibiExplainerSpec"
        },
        "batcher": {
          "description": "Activate request batching",
          "$ref": "#/definitions/v1alpha2.Batcher"
        },
        "custom": {
          "description": "Spec for a custom explainer",
          "$ref": "#/definitions/v1alpha2.CustomSpec"
        },
        "logger": {
          "description": "Activate request/response logging",
          "$ref": "#/definitions/v1alpha2.Logger"
        },
        "maxReplicas": {
          "description": "This is the up bound for autoscaler to scale to",
          "type": "integer",
          "format": "int32"
        },
        "minReplicas": {
          "description": "Minimum number of replicas which defaults to 1, when minReplicas = 0 pods scale down to 0 in case of no traffic",
          "type": "integer",
          "format": "int32"
        },
        "parallelism": {
          "description": "Parallelism specifies how many requests can be processed concurrently, this sets the hard limit of the container concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).",
          "type": "integer",
          "format": "int32"
        },
        "serviceAccountName": {
          "description": "ServiceAccountName is the name of the ServiceAccount to use to run the service",
          "type": "string"
        }
      }
    },
    "v1alpha2.InferenceService": {
      "description": "InferenceService is the Schema for the services API",
      "type": "object",
      "properties": {
        "apiVersion": {
          "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources",
          "type": "string"
        },
        "kind": {
          "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds",
          "type": "string"
        },
        "metadata": {
          "$ref": "#/definitions/v1.ObjectMeta"
        },
        "spec": {
          "$ref": "#/definitions/v1alpha2.InferenceServiceSpec"
        },
        "status": {
          "$ref": "#/definitions/v1alpha2.InferenceServiceStatus"
        }
      }
    },
    "v1alpha2.InferenceServiceList": {
      "description": "InferenceServiceList contains a list of Service",
      "type": "object",
      "required": [
        "items"
      ],
      "properties": {
        "apiVersion": {
          "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources",
          "type": "string"
        },
        "items": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/v1alpha2.InferenceService"
          },
          "x-kubernetes-list-type": "set"
        },
        "kind": {
          "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds",
          "type": "string"
        },
        "metadata": {
          "$ref": "#/definitions/v1.ListMeta"
        }
      }
    },
    "v1alpha2.InferenceServiceSpec": {
      "description": "InferenceServiceSpec defines the desired state of InferenceService",
      "type": "object",
      "required": [
        "default"
      ],
      "properties": {
        "canary": {
          "description": "Canary defines alternate endpoints to route a percentage of traffic.",
          "$ref": "#/definitions/v1alpha2.EndpointSpec"
        },
        "canaryTrafficPercent": {
          "description": "CanaryTrafficPercent defines the percentage of traffic going to canary InferenceService endpoints",
          "type": "integer",
          "format": "int32"
        },
        "default": {
          "description": "Default defines default InferenceService endpoints",
          "$ref": "#/definitions/v1alpha2.EndpointSpec"
        }
      }
    },
    "v1alpha2.InferenceServiceStatus": {
      "description": "InferenceServiceStatus defines the observed state of InferenceService",
      "type": "object",
      "properties": {
        "address": {
          "description": "Addressable URL for eventing",
          "$ref": "#/definitions/knative.Addressable"
        },
        "annotations": {
          "description": "Annotations is additional Status fields for the Resource to save some additional State as well as convey more information to the user. This is roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards.",
          "type": "object",
          "additionalProperties": {
            "type": "string"
          }
        },
        "canary": {
          "description": "Statuses for the canary endpoints of the InferenceService",
          "type": "object",
          "additionalProperties": {
            "$ref": "#/definitions/v1alpha2.StatusConfigurationSpec"
          }
        },
        "canaryTraffic": {
          "description": "Traffic percentage that goes to canary services",
          "type": "integer",
          "format": "int32"
        },
        "conditions": {
          "description": "Conditions the latest available observations of a resource's current state.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/knative.Condition"
          },
          "x-kubernetes-patch-merge-key": "type",
          "x-kubernetes-patch-strategy": "merge"
        },
        "default": {
          "description": "Statuses for the default endpoints of the InferenceService",
          "type": "object",
          "additionalProperties": {
            "$ref": "#/definitions/v1alpha2.StatusConfigurationSpec"
          }
        },
        "observedGeneration": {
          "description": "ObservedGeneration is the 'Generation' of the Service that was last processed by the controller.",
          "type": "integer",
          "format": "int64"
        },
        "traffic": {
          "description": "Traffic percentage that goes to default services",
          "type": "integer",
          "format": "int32"
        },
        "url": {
          "description": "URL of the InferenceService",
          "type": "string"
        }
      }
    },
    "v1alpha2.Logger": {
      "description": "Logger provides optional payload logging for all endpoints",
      "type": "object",
      "properties": {
        "mode": {
          "description": "What payloads to log: [all, request, response]",
          "type": "string"
        },
        "url": {
          "description": "URL to send request logging CloudEvents",
          "type": "string"
        }
      }
    },
    "v1alpha2.ONNXSpec": {
      "description": "ONNXSpec defines arguments for configuring ONNX model serving.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "ONNXRuntime docker image versions, default version can be set in the inferenceservice configmap",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI of the exported onnx model(model.onnx)",
          "type": "string"
        }
      }
    },
    "v1alpha2.PredictorSpec": {
      "description": "PredictorSpec defines the configuration for a predictor, The following fields follow a \"1-of\" semantic. Users must specify exactly one spec.",
      "type": "object",
      "properties": {
        "batcher": {
          "description": "Activate request batching",
          "$ref": "#/definitions/v1alpha2.Batcher"
        },
        "custom": {
          "description": "Spec for a custom predictor",
          "$ref": "#/definitions/v1alpha2.CustomSpec"
        },
        "logger": {
          "description": "Activate request/response logging",
          "$ref": "#/definitions/v1alpha2.Logger"
        },
        "maxReplicas": {
          "description": "This is the up bound for autoscaler to scale to",
          "type": "integer",
          "format": "int32"
        },
        "minReplicas": {
          "description": "Minimum number of replicas which defaults to 1, when minReplicas = 0 pods scale down to 0 in case of no traffic",
          "type": "integer",
          "format": "int32"
        },
        "onnx": {
          "description": "Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)",
          "$ref": "#/definitions/v1alpha2.ONNXSpec"
        },
        "parallelism": {
          "description": "Parallelism specifies how many requests can be processed concurrently, this sets the hard limit of the container concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).",
          "type": "integer",
          "format": "int32"
        },
        "pytorch": {
          "description": "Spec for PyTorch predictor",
          "$ref": "#/definitions/v1alpha2.PyTorchSpec"
        },
        "serviceAccountName": {
          "description": "ServiceAccountName is the name of the ServiceAccount to use to run the service",
          "type": "string"
        },
        "sklearn": {
          "description": "Spec for SKLearn predictor",
          "$ref": "#/definitions/v1alpha2.SKLearnSpec"
        },
        "pmml": {
          "description": "Spec for PMML predictor",
          "$ref": "#/definitions/v1alpha2.PMMLSpec"
        },
        "tensorflow": {
          "description": "Spec for Tensorflow Serving (https://github.com/tensorflow/serving)",
          "$ref": "#/definitions/v1alpha2.TensorflowSpec"
        },
        "triton": {
          "description": "Spec for Triton Inference Server (https://github.com/triton-inference-server/server)",
          "$ref": "#/definitions/v1alpha2.TritonSpec"
        },
        "xgboost": {
          "description": "Spec for XGBoost predictor",
          "$ref": "#/definitions/v1alpha2.XGBoostSpec"
        }
      }
    },
    "v1alpha2.PyTorchSpec": {
      "description": "PyTorchSpec defines arguments for configuring PyTorch model serving.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "modelClassName": {
          "description": "Defaults PyTorch model class name to 'PyTorchModel'",
          "type": "string"
        },
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "PyTorch KFServer docker image version which defaults to latest release",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI of the trained model which contains model.pt",
          "type": "string"
        }
      }
    },
    "v1alpha2.SKLearnSpec": {
      "description": "SKLearnSpec defines arguments for configuring SKLearn model serving.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "SKLearn KFServer docker image version which defaults to latest release",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI of the trained model which contains model.pickle, model.pkl or model.joblib",
          "type": "string"
        }
      }
    },
    "v1alpha2.PMMLSpec": {
      "description": "PMMLSpec defines arguments for configuring PMML model serving.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "PMML KFServer docker image version which defaults to latest release",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI of the trained model which contains model.pickle, model.pkl or model.joblib",
          "type": "string"
        }
      }
    },
    "v1alpha2.StatusConfigurationSpec": {
      "description": "StatusConfigurationSpec describes the state of the configuration receiving traffic.",
      "type": "object",
      "properties": {
        "host": {
          "description": "Host name of the service",
          "type": "string"
        },
        "name": {
          "description": "Latest revision name that is in ready state",
          "type": "string"
        }
      }
    },
    "v1alpha2.TensorflowSpec": {
      "description": "TensorflowSpec defines arguments for configuring Tensorflow model serving.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "TFServing docker image version(https://hub.docker.com/r/tensorflow/serving), default version can be set in the inferenceservice configmap.",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI for the saved model(https://www.tensorflow.org/tutorials/keras/save_and_load)",
          "type": "string"
        }
      }
    },
    "v1alpha2.TransformerSpec": {
      "description": "TransformerSpec defines transformer service for pre/post processing",
      "type": "object",
      "properties": {
        "batcher": {
          "description": "Activate request batching",
          "$ref": "#/definitions/v1alpha2.Batcher"
        },
        "custom": {
          "description": "Spec for a custom transformer",
          "$ref": "#/definitions/v1alpha2.CustomSpec"
        },
        "logger": {
          "description": "Activate request/response logging",
          "$ref": "#/definitions/v1alpha2.Logger"
        },
        "maxReplicas": {
          "description": "This is the up bound for autoscaler to scale to",
          "type": "integer",
          "format": "int32"
        },
        "minReplicas": {
          "description": "Minimum number of replicas which defaults to 1, when minReplicas = 0 pods scale down to 0 in case of no traffic",
          "type": "integer",
          "format": "int32"
        },
        "parallelism": {
          "description": "Parallelism specifies how many requests can be processed concurrently, this sets the hard limit of the container concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).",
          "type": "integer",
          "format": "int32"
        },
        "serviceAccountName": {
          "description": "ServiceAccountName is the name of the ServiceAccount to use to run the service",
          "type": "string"
        }
      }
    },
    "v1alpha2.TritonSpec": {
      "description": "TritonSpec defines arguments for configuring Triton Inference Server.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "Triton Inference Server docker image version, default version can be set in the inferenceservice configmap",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI for the trained model repository(https://docs.nvidia.com/deeplearning/triton-inference-server/master-user-guide/docs/model_repository.html)",
          "type": "string"
        }
      }
    },
    "v1alpha2.XGBoostSpec": {
      "description": "XGBoostSpec defines arguments for configuring XGBoost model serving.",
      "type": "object",
      "required": [
        "storageUri"
      ],
      "properties": {
        "nthread": {
          "description": "Number of thread to be used by XGBoost",
          "type": "integer",
          "format": "int32"
        },
        "resources": {
          "description": "Defaults to requests and limits of 1CPU, 2Gb MEM.",
          "$ref": "#/definitions/v1.ResourceRequirements"
        },
        "runtimeVersion": {
          "description": "XGBoost KFServer docker image version which defaults to latest release",
          "type": "string"
        },
        "storageUri": {
          "description": "The URI of the trained model which contains model.bst",
          "type": "string"
        }
      }
    }
  }
}