diff --git a/examples/ai-proxy-advanced/_3.8.x.yaml b/examples/ai-proxy-advanced/_3.8.x.yaml new file mode 100644 index 0000000..54acb74 --- /dev/null +++ b/examples/ai-proxy-advanced/_3.8.x.yaml @@ -0,0 +1,15 @@ +name: ai-proxy-advanced +config: + targets: + - route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer token + model: + provider: openai + - route_type: llm/v1/chat + model: + provider: llama2 + options: + llama2_format: openai + upstream_url: http://httpbin diff --git a/schemas/ai-proxy-advanced/3.8.x.json b/schemas/ai-proxy-advanced/3.8.x.json new file mode 100644 index 0000000..500ad14 --- /dev/null +++ b/schemas/ai-proxy-advanced/3.8.x.json @@ -0,0 +1,1127 @@ +{ + "fields": [ + { + "protocols": { + "required": true, + "elements": { + "one_of": [ + "grpc", + "grpcs", + "http", + "https" + ], + "type": "string" + }, + "type": "set", + "default": [ + "grpc", + "grpcs", + "http", + "https" + ], + "description": "A set of strings representing HTTP protocols." + } + }, + { + "config": { + "type": "record", + "fields": [ + { + "balancer": { + "required": true, + "fields": [ + { + "algorithm": { + "default": "round-robin", + "type": "string", + "one_of": [ + "round-robin", + "lowest-latency", + "lowest-usage", + "consistent-hashing", + "semantic" + ], + "description": "Which load balancing algorithm to use." + } + }, + { + "tokens_count_strategy": { + "default": "total-tokens", + "type": "string", + "one_of": [ + "total-tokens", + "prompt-tokens", + "completion-tokens" + ], + "description": "What tokens to use for usage calculation. Available values are: `total_tokens` `prompt_tokens`, and `completion_tokens`." + } + }, + { + "latency_strategy": { + "default": "tpot", + "type": "string", + "one_of": [ + "tpot", + "e2e" + ], + "description": "What metrics to use for latency. Available values are: `tpot` (time-per-output-token) and `e2e`." + } + }, + { + "hash_on_header": { + "type": "string", + "default": "X-Kong-LLM-Request-ID", + "description": "The header to use for consistent-hashing." + } + }, + { + "slots": { + "between": [ + 10, + 65536 + ], + "type": "integer", + "default": 10000, + "description": "The number of slots in the load balancer algorithm." + } + }, + { + "retries": { + "between": [ + 0, + 32767 + ], + "type": "integer", + "default": 5, + "description": "The number of retries to execute upon failure to proxy." + } + }, + { + "connect_timeout": { + "between": [ + 1, + 2147483646 + ], + "default": 60000, + "type": "integer" + } + }, + { + "write_timeout": { + "between": [ + 1, + 2147483646 + ], + "default": 60000, + "type": "integer" + } + }, + { + "read_timeout": { + "between": [ + 1, + 2147483646 + ], + "default": 60000, + "type": "integer" + } + } + ], + "type": "record" + } + }, + { + "embeddings": { + "required": false, + "fields": [ + { + "auth": { + "required": false, + "fields": [ + { + "password": { + "required": false, + "referenceable": true, + "encrypted": true, + "type": "string", + "description": "authentication password" + } + }, + { + "token": { + "required": false, + "referenceable": true, + "encrypted": true, + "type": "string", + "description": "authentication token" + } + } + ], + "type": "record" + } + }, + { + "provider": { + "required": true, + "type": "string", + "one_of": [ + "mistralai", + "openai" + ], + "description": "which provider to use for embeddings" + } + }, + { + "name": { + "required": true, + "type": "string", + "one_of": [ + "text-embedding-3-large", + "text-embedding-3-small", + "mistral-embed" + ], + "description": "which AI model to use for generating embeddings" + } + }, + { + "upstream_url": { + "type": "string", + "required": false, + "description": "upstream url for the embeddings" + } + } + ], + "type": "record" + } + }, + { + "vectordb": { + "required": false, + "fields": [ + { + "strategy": { + "required": true, + "type": "string", + "one_of": [ + "redis" + ], + "description": "which vector database driver to use" + } + }, + { + "dimensions": { + "type": "integer", + "required": true, + "description": "the desired dimensionality for the vectors" + } + }, + { + "threshold": { + "type": "number", + "required": true, + "description": "the default similarity threshold for accepting semantic search results (float)" + } + }, + { + "distance_metric": { + "required": true, + "type": "string", + "one_of": [ + "cosine", + "euclidean" + ], + "description": "the distance metric to use for vector searches" + } + }, + { + "redis": { + "type": "record", + "shorthand_fields": [ + { + "timeout": { + "deprecation": { + "removal_in_version": "4.0", + "message": "redis schema field `timeout` is deprecated, use `connect_timeout`, `send_timeout` and `read_timeout`" + }, + "translate_backwards": [ + "connect_timeout" + ], + "type": "integer" + } + }, + { + "sentinel_addresses": { + "len_min": 1, + "type": "array", + "deprecation": { + "removal_in_version": "4.0", + "message": "sentinel_addresses is deprecated, please use sentinel_nodes instead" + }, + "elements": { + "type": "string" + } + } + }, + { + "cluster_addresses": { + "len_min": 1, + "type": "array", + "deprecation": { + "removal_in_version": "4.0", + "message": "cluster_addresses is deprecated, please use cluster_nodes instead" + }, + "elements": { + "type": "string" + } + } + } + ], + "entity_checks": [ + { + "mutually_exclusive_sets": { + "set1": [ + "sentinel_master", + "sentinel_role", + "sentinel_nodes" + ], + "set2": [ + "host", + "port" + ] + } + }, + { + "mutually_exclusive_sets": { + "set1": [ + "sentinel_master", + "sentinel_role", + "sentinel_nodes" + ], + "set2": [ + "cluster_nodes" + ] + } + }, + { + "mutually_exclusive_sets": { + "set1": [ + "cluster_nodes" + ], + "set2": [ + "host", + "port" + ] + } + }, + { + "mutually_required": [ + "sentinel_master", + "sentinel_role", + "sentinel_nodes" + ] + }, + { + "mutually_required": [ + "host", + "port" + ] + }, + { + "mutually_required": [ + "connect_timeout", + "send_timeout", + "read_timeout" + ] + } + ], + "fields": [ + { + "host": { + "description": "A string representing a host name, such as example.com.", + "type": "string" + } + }, + { + "port": { + "between": [ + 0, + 65535 + ], + "type": "integer", + "description": "An integer representing a port number between 0 and 65535, inclusive." + } + }, + { + "connect_timeout": { + "type": "integer", + "between": [ + 0, + 2147483646 + ], + "default": 2000, + "description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2." + } + }, + { + "send_timeout": { + "type": "integer", + "between": [ + 0, + 2147483646 + ], + "default": 2000, + "description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2." + } + }, + { + "read_timeout": { + "type": "integer", + "between": [ + 0, + 2147483646 + ], + "default": 2000, + "description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2." + } + }, + { + "username": { + "type": "string", + "referenceable": true, + "description": "Username to use for Redis connections. If undefined, ACL authentication won't be performed. This requires Redis v6.0.0+. To be compatible with Redis v5.x.y, you can set it to `default`." + } + }, + { + "password": { + "encrypted": true, + "type": "string", + "referenceable": true, + "description": "Password to use for Redis connections. If undefined, no AUTH commands are sent to Redis." + } + }, + { + "sentinel_username": { + "type": "string", + "referenceable": true, + "description": "Sentinel username to authenticate with a Redis Sentinel instance. If undefined, ACL authentication won't be performed. This requires Redis v6.2.0+." + } + }, + { + "sentinel_password": { + "encrypted": true, + "type": "string", + "referenceable": true, + "description": "Sentinel password to authenticate with a Redis Sentinel instance. If undefined, no AUTH commands are sent to Redis Sentinels." + } + }, + { + "database": { + "type": "integer", + "default": 0, + "description": "Database to use for the Redis connection when using the `redis` strategy" + } + }, + { + "keepalive_pool_size": { + "between": [ + 1, + 2147483646 + ], + "type": "integer", + "default": 256, + "description": "The size limit for every cosocket connection pool associated with every remote server, per worker process. If neither `keepalive_pool_size` nor `keepalive_backlog` is specified, no pool is created. If `keepalive_pool_size` isn't specified but `keepalive_backlog` is specified, then the pool uses the default value. Try to increase (e.g. 512) this value if latency is high or throughput is low." + } + }, + { + "keepalive_backlog": { + "type": "integer", + "between": [ + 0, + 2147483646 + ], + "description": "Limits the total number of opened connections for a pool. If the connection pool is full, connection queues above the limit go into the backlog queue. If the backlog queue is full, subsequent connect operations fail and return `nil`. Queued operations (subject to set timeouts) resume once the number of connections in the pool is less than `keepalive_pool_size`. If latency is high or throughput is low, try increasing this value. Empirically, this value is larger than `keepalive_pool_size`." + } + }, + { + "sentinel_master": { + "type": "string", + "description": "Sentinel master to use for Redis connections. Defining this value implies using Redis Sentinel." + } + }, + { + "sentinel_role": { + "type": "string", + "one_of": [ + "master", + "slave", + "any" + ], + "description": "Sentinel role to use for Redis connections when the `redis` strategy is defined. Defining this value implies using Redis Sentinel." + } + }, + { + "sentinel_nodes": { + "type": "array", + "elements": { + "fields": [ + { + "host": { + "type": "string", + "required": true, + "default": "127.0.0.1", + "description": "A string representing a host name, such as example.com." + } + }, + { + "port": { + "type": "integer", + "between": [ + 0, + 65535 + ], + "default": 6379, + "description": "An integer representing a port number between 0 and 65535, inclusive." + } + } + ], + "type": "record" + }, + "len_min": 1, + "required": false, + "description": "Sentinel node addresses to use for Redis connections when the `redis` strategy is defined. Defining this field implies using a Redis Sentinel. The minimum length of the array is 1 element." + } + }, + { + "cluster_nodes": { + "type": "array", + "elements": { + "fields": [ + { + "ip": { + "type": "string", + "required": true, + "default": "127.0.0.1", + "description": "A string representing a host name, such as example.com." + } + }, + { + "port": { + "type": "integer", + "between": [ + 0, + 65535 + ], + "default": 6379, + "description": "An integer representing a port number between 0 and 65535, inclusive." + } + } + ], + "type": "record" + }, + "len_min": 1, + "required": false, + "description": "Cluster addresses to use for Redis connections when the `redis` strategy is defined. Defining this field implies using a Redis Cluster. The minimum length of the array is 1 element." + } + }, + { + "ssl": { + "required": false, + "type": "boolean", + "default": false, + "description": "If set to true, uses SSL to connect to Redis." + } + }, + { + "ssl_verify": { + "required": false, + "type": "boolean", + "default": false, + "description": "If set to true, verifies the validity of the server SSL certificate. If setting this parameter, also configure `lua_ssl_trusted_certificate` in `kong.conf` to specify the CA (or server) certificate used by your Redis server. You may also need to configure `lua_ssl_verify_depth` accordingly." + } + }, + { + "server_name": { + "type": "string", + "required": false, + "description": "A string representing an SNI (server name indication) value for TLS." + } + }, + { + "cluster_max_redirections": { + "required": false, + "type": "integer", + "default": 5, + "description": "Maximum retry attempts for redirection." + } + } + ], + "required": true + } + } + ], + "type": "record" + } + }, + { + "max_request_body_size": { + "type": "integer", + "gt": 0, + "default": 8192, + "description": "max allowed body size allowed to be introspected" + } + }, + { + "model_name_header": { + "type": "boolean", + "default": true, + "description": "Display the model name selected in the X-Kong-LLM-Model response header" + } + }, + { + "targets": { + "type": "array", + "required": true, + "elements": { + "type": "record", + "fields": [ + { + "route_type": { + "required": true, + "type": "string", + "one_of": [ + "llm/v1/chat", + "llm/v1/completions", + "preserve" + ], + "description": "The model's operation implementation, for this provider. Set to `preserve` to pass through without transformation." + } + }, + { + "auth": { + "required": false, + "fields": [ + { + "header_name": { + "required": false, + "type": "string", + "referenceable": true, + "description": "If AI model requires authentication via Authorization or API key header, specify its name here." + } + }, + { + "header_value": { + "type": "string", + "referenceable": true, + "description": "Specify the full auth header value for 'header_name', for example 'Bearer key' or just 'key'.", + "required": false, + "encrypted": true + } + }, + { + "param_name": { + "required": false, + "type": "string", + "referenceable": true, + "description": "If AI model requires authentication via query parameter, specify its name here." + } + }, + { + "param_value": { + "type": "string", + "referenceable": true, + "description": "Specify the full parameter value for 'param_name'.", + "required": false, + "encrypted": true + } + }, + { + "param_location": { + "required": false, + "type": "string", + "one_of": [ + "query", + "body" + ], + "description": "Specify whether the 'param_name' and 'param_value' options go in a query string, or the POST form/JSON body." + } + }, + { + "azure_use_managed_identity": { + "required": false, + "type": "boolean", + "default": false, + "description": "Set true to use the Azure Cloud Managed Identity (or user-assigned identity) to authenticate with Azure-provider models." + } + }, + { + "azure_client_id": { + "required": false, + "type": "string", + "referenceable": true, + "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client ID." + } + }, + { + "azure_client_secret": { + "type": "string", + "referenceable": true, + "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client secret.", + "required": false, + "encrypted": true + } + }, + { + "azure_tenant_id": { + "required": false, + "type": "string", + "referenceable": true, + "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the tenant ID." + } + }, + { + "gcp_use_service_account": { + "required": false, + "type": "boolean", + "default": false, + "description": "Use service account auth for GCP-based providers and models." + } + }, + { + "gcp_service_account_json": { + "type": "string", + "referenceable": true, + "description": "Set this field to the full JSON of the GCP service account to authenticate, if required. If null (and gcp_use_service_account is true), Kong will attempt to read from environment variable `GCP_SERVICE_ACCOUNT`.", + "required": false, + "encrypted": true + } + }, + { + "aws_access_key_id": { + "type": "string", + "referenceable": true, + "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID environment variable for this plugin instance.", + "required": false, + "encrypted": true + } + }, + { + "aws_secret_access_key": { + "type": "string", + "referenceable": true, + "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY environment variable for this plugin instance.", + "required": false, + "encrypted": true + } + } + ], + "type": "record" + } + }, + { + "model": { + "required": true, + "fields": [ + { + "provider": { + "required": true, + "type": "string", + "one_of": [ + "openai", + "azure", + "anthropic", + "cohere", + "mistral", + "llama2", + "gemini", + "bedrock" + ], + "description": "AI provider request format - Kong translates requests to and from the specified backend compatible formats." + } + }, + { + "name": { + "type": "string", + "required": false, + "description": "Model name to execute." + } + }, + { + "options": { + "required": false, + "type": "record", + "fields": [ + { + "max_tokens": { + "required": false, + "type": "integer", + "default": 256, + "description": "Defines the max_tokens, if using chat or completion models." + } + }, + { + "input_cost": { + "gt": 0, + "type": "number", + "required": false, + "description": "Defines the cost per 1M tokens in your prompt." + } + }, + { + "output_cost": { + "gt": 0, + "type": "number", + "required": false, + "description": "Defines the cost per 1M tokens in the output of the AI." + } + }, + { + "temperature": { + "between": [ + 0, + 5 + ], + "type": "number", + "required": false, + "description": "Defines the matching temperature, if using chat or completion models." + } + }, + { + "top_p": { + "between": [ + 0, + 1 + ], + "type": "number", + "required": false, + "description": "Defines the top-p probability mass, if supported." + } + }, + { + "top_k": { + "between": [ + 0, + 500 + ], + "type": "integer", + "required": false, + "description": "Defines the top-k most likely tokens, if supported." + } + }, + { + "anthropic_version": { + "type": "string", + "required": false, + "description": "Defines the schema/API version, if using Anthropic provider." + } + }, + { + "azure_instance": { + "type": "string", + "required": false, + "description": "Instance name for Azure OpenAI hosted models." + } + }, + { + "azure_api_version": { + "required": false, + "type": "string", + "default": "2023-05-15", + "description": "'api-version' for Azure OpenAI instances." + } + }, + { + "azure_deployment_id": { + "type": "string", + "required": false, + "description": "Deployment ID for Azure OpenAI instances." + } + }, + { + "llama2_format": { + "required": false, + "type": "string", + "one_of": [ + "raw", + "openai", + "ollama" + ], + "description": "If using llama2 provider, select the upstream message format." + } + }, + { + "mistral_format": { + "required": false, + "type": "string", + "one_of": [ + "openai", + "ollama" + ], + "description": "If using mistral provider, select the upstream message format." + } + }, + { + "upstream_url": { + "required": false, + "type": "string", + "description": "Manually specify or override the full URL to the AI operation endpoints, when calling (self-)hosted models, or for running via a private endpoint." + } + }, + { + "upstream_path": { + "type": "string", + "required": false, + "description": "Manually specify or override the AI operation path, used when e.g. using the 'preserve' route_type." + } + }, + { + "gemini": { + "entity_checks": [ + { + "mutually_required": [ + "api_endpoint", + "project_id", + "location_id" + ] + } + ], + "required": false, + "fields": [ + { + "api_endpoint": { + "type": "string", + "required": false, + "description": "If running Gemini on Vertex, specify the regional API endpoint (hostname only)." + } + }, + { + "project_id": { + "type": "string", + "required": false, + "description": "If running Gemini on Vertex, specify the project ID." + } + }, + { + "location_id": { + "type": "string", + "required": false, + "description": "If running Gemini on Vertex, specify the location ID." + } + } + ], + "type": "record" + } + }, + { + "bedrock": { + "required": false, + "fields": [ + { + "aws_region": { + "type": "string", + "required": false, + "description": "If using AWS providers (Bedrock) you can override the `AWS_REGION` environment variable by setting this option." + } + } + ], + "type": "record" + } + } + ], + "description": "Key/value settings for the model" + } + } + ], + "type": "record" + } + }, + { + "weight": { + "between": [ + 1, + 65535 + ], + "type": "integer", + "default": 100, + "description": "The weight this target gets within the upstream loadbalancer (1-65535)." + } + }, + { + "description": { + "type": "string", + "required": false, + "description": "The semantic description of the target, required if using semantic load balancing." + } + }, + { + "logging": { + "required": true, + "fields": [ + { + "log_statistics": { + "required": true, + "type": "boolean", + "default": false, + "description": "If enabled and supported by the driver, will add model usage and token metrics into the Kong log plugin(s) output." + } + }, + { + "log_payloads": { + "required": true, + "type": "boolean", + "default": false, + "description": "If enabled, will log the request and response body into the Kong log plugin(s) output." + } + } + ], + "type": "record" + } + } + ], + "entity_checks": [ + { + "mutually_required": [ + "auth.header_name", + "auth.header_value" + ] + }, + { + "mutually_required": [ + "auth.param_name", + "auth.param_value", + "auth.param_location" + ] + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.llama2_format" + ], + "if_match": { + "one_of": [ + "llama2" + ] + }, + "then_err": "must set %s for llama2 provider", + "if_field": "model.provider" + } + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.mistral_format" + ], + "if_match": { + "one_of": [ + "mistral" + ] + }, + "then_err": "must set %s for mistral provider", + "if_field": "model.provider" + } + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.anthropic_version" + ], + "if_match": { + "one_of": [ + "anthropic" + ] + }, + "then_err": "must set %s for anthropic provider", + "if_field": "model.provider" + } + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.azure_instance" + ], + "if_match": { + "one_of": [ + "azure" + ] + }, + "then_err": "must set %s for azure provider", + "if_field": "model.provider" + } + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.azure_api_version" + ], + "if_match": { + "one_of": [ + "azure" + ] + }, + "then_err": "must set %s for azure provider", + "if_field": "model.provider" + } + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.azure_deployment_id" + ], + "if_match": { + "one_of": [ + "azure" + ] + }, + "then_err": "must set %s for azure provider", + "if_field": "model.provider" + } + }, + { + "conditional_at_least_one_of": { + "then_at_least_one_of": [ + "model.options.upstream_url" + ], + "if_match": { + "one_of": [ + "llama2" + ] + }, + "then_err": "must set %s for self-hosted providers/models", + "if_field": "model.provider" + } + }, + { + "custom_entity_check": { + "field_sources": [ + "route_type", + "model", + "logging" + ] + } + } + ] + } + } + } + ], + "required": true + } + } + ], + "entity_checks": [ + { + "custom_entity_check": { + "field_sources": [ + "config.targets" + ] + } + }, + { + "custom_entity_check": { + "field_sources": [ + "config.targets", + "config.balancer" + ] + } + } + ] +} \ No newline at end of file