Skip to content

Commit

Permalink
Azure example terraform (#1274)
Browse files Browse the repository at this point in the history
* example terraform for azure

Signed-off-by: Jacob Klegar <jacob@tecton.ai>

* azure tf adjustments

Signed-off-by: Jacob Klegar <jacob@tecton.ai>

* more adjustments

Signed-off-by: Jacob Klegar <jacob@tecton.ai>

* changes to example notebook

Signed-off-by: Jacob Klegar <jacob@tecton.ai>

* add default aks namespace

Signed-off-by: Jacob Klegar <jacob@tecton.ai>

* remove extra print statement

Signed-off-by: Jacob Klegar <jacob@tecton.ai>
  • Loading branch information
jklegar committed Jan 27, 2021
1 parent c50c21c commit 057424b
Show file tree
Hide file tree
Showing 11 changed files with 418 additions and 0 deletions.
15 changes: 15 additions & 0 deletions examples/minimal/minimal_ride_hailing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,15 @@
" files = [\"s3://\" + path for path in fs.glob(uri + '/part-*')]\n",
" ds = ParquetDataset(files, filesystem=fs)\n",
" return ds.read().to_pandas()\n",
" elif parsed_uri.scheme == 'wasbs':\n",
" import adlfs\n",
" fs = adlfs.AzureBlobFileSystem(\n",
" account_name=os.getenv('FEAST_AZURE_BLOB_ACCOUNT_NAME'), account_key=os.getenv('FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY')\n",
" )\n",
" uripath = parsed_uri.username + parsed_uri.path\n",
" files = fs.glob(uripath + '/part-*')\n",
" ds = ParquetDataset(files, filesystem=fs)\n",
" return ds.read().to_pandas()\n",
" else:\n",
" raise ValueError(f\"Unsupported URL scheme {uri}\")"
]
Expand Down Expand Up @@ -1275,6 +1284,12 @@
"metadata": {},
"outputs": [],
"source": [
"# Note: depending on the Kafka configuration you may need to create the Kafka topic first, like below:\n",
"#from confluent_kafka.admin import AdminClient, NewTopic\n",
"#admin = AdminClient({'bootstrap.servers': KAFKA_BROKER})\n",
"#new_topic = NewTopic('driver_trips', num_partitions=1, replication_factor=3)\n",
"#admin.create_topics(new_topic)\n",
"\n",
"for record in trips_df.drop(columns=['created']).to_dict('record'):\n",
" record[\"datetime\"] = (\n",
" record[\"datetime\"].to_pydatetime().replace(tzinfo=pytz.utc)\n",
Expand Down
36 changes: 36 additions & 0 deletions infra/terraform/azure/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Terraform config for Feast on Azure

This serves as a guide on how to deploy Feast on Azure. At the end of this guide, we will have provisioned:
1. AKS cluster
2. Feast services running on AKS
3. Azure Cache (Redis) as online store
4. Spark operator on AKS
5. Kafka running on HDInsight.

# Steps

1. Create a tfvars file, e.g. `my.tfvars`. A sample configuration is as below:

```
name_prefix = "feast09"
resource_group = "Feast" # pre-exisiting resource group
```

3. Configure tf state backend, e.g.:
```
terraform {
backend "azurerm" {
storage_account_name = "<your storage account name>"
container_name = "<your container name>"
key = "<your blob name>"
}
}
```

3. Use `terraform apply -var-file="my.tfvars"` to deploy.

Note: to get the list of Kafka brokers needed for streaming ingestion, use

`curl -sS -u <Kafka gateway username>:<Kafka gateway password> -G https://<Kafka cluster name>.azurehdinsight.net/api/v1/clusters/<Kafka cluster name>/services/KAFKA/components/KAFKA_BROKER | jq -r '["\(.host_components[].HostRoles.host_name):9092"] | join(",")'`

where the Kafka gateway username is <name_prefix>-kafka-gateway, the Kafka cluster name is <name_prefix>-kafka, and the Kafka gateway password is a kubectl secret under the name feast-kafka-gateway.
15 changes: 15 additions & 0 deletions infra/terraform/azure/aks.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
resource "azurerm_kubernetes_cluster" "main" {
name = "${var.name_prefix}-aks"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
dns_prefix = var.name_prefix
default_node_pool {
name = var.name_prefix
vm_size = var.aks_machine_type
node_count = var.aks_node_count
vnet_subnet_id = azurerm_subnet.main.id
}
identity {
type = "SystemAssigned"
}
}
101 changes: 101 additions & 0 deletions infra/terraform/azure/helm.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
locals {
feast_postgres_secret_name = "${var.name_prefix}-postgres-secret"
feast_helm_values = {
redis = {
enabled = false
}

grafana = {
enabled = false
}

kafka = {
enabled = false
}

postgresql = {
existingSecret = local.feast_postgres_secret_name
}

feast-core = {
postgresql = {
existingSecret = local.feast_postgres_secret_name
}
}

feast-online-serving = {
enabled = true
"application-override.yaml" = {
feast = {
core-host = "${var.name_prefix}-feast-core"
core-grpc-port = 6565
active_store = "online_store"
stores = [
{
name = "online_store"
type = "REDIS"
config = {
host = azurerm_redis_cache.main.hostname
port = azurerm_redis_cache.main.ssl_port
ssl = true
}
}
]
}
}
}

feast-jupyter = {
enabled = true
envOverrides = {
feast_redis_host = azurerm_redis_cache.main.hostname,
feast_redis_port = azurerm_redis_cache.main.ssl_port,
feast_redis_ssl = true
feast_spark_launcher = "k8s"
feast_spark_staging_location = "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/artifacts/"
feast_historical_feature_output_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/out/"
feast_historical_feature_output_format : "parquet"
demo_data_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/test-data/"
feast_azure_blob_account_name = azurerm_storage_account.main.name
feast_azure_blob_account_access_key = azurerm_storage_account.main.primary_access_key
}
}
}
}

resource "random_password" "feast-postgres-password" {
length = 16
special = false
}

resource "kubernetes_secret" "feast-postgres-secret" {
metadata {
name = local.feast_postgres_secret_name
}
data = {
postgresql-password = random_password.feast-postgres-password.result
}
}

resource "helm_release" "feast" {
depends_on = [kubernetes_secret.feast-postgres-secret]

name = var.name_prefix
namespace = var.aks_namespace
chart = "../../charts/feast"

values = [
yamlencode(local.feast_helm_values)
]
}

resource "helm_release" "sparkop" {
name = "sparkop"
namespace = "default"
repository = "https://googlecloudplatform.github.io/spark-on-k8s-operator"
chart = "spark-operator"
set {
name = "serviceAccounts.spark.name"
value = "spark"
}
}
75 changes: 75 additions & 0 deletions infra/terraform/azure/kafka.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
resource "azurerm_hdinsight_kafka_cluster" "main" {
name = "${var.name_prefix}-kafka"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
cluster_version = "4.0"
tier = "Standard"

component_version {
kafka = "2.1"
}

gateway {
enabled = true
username = "${var.name_prefix}-kafka-gateway"
password = random_password.feast-kafka-gateway-password.result
}

storage_account {
is_default = true
storage_account_key = azurerm_storage_account.main.primary_access_key
storage_container_id = azurerm_storage_container.kafka.id
}

roles {
head_node {
vm_size = var.kafka_head_vm_size
username = "${var.name_prefix}-kafka-user"
password = random_password.feast-kafka-role-password.result
subnet_id = azurerm_subnet.kafka.id
virtual_network_id = azurerm_virtual_network.main.id
}
worker_node {
vm_size = var.kafka_worker_vm_size
username = "${var.name_prefix}-kafka-user"
password = random_password.feast-kafka-role-password.result
number_of_disks_per_node = var.kafka_worker_disks_per_node
target_instance_count = var.kafka_worker_target_instance_count
subnet_id = azurerm_subnet.kafka.id
virtual_network_id = azurerm_virtual_network.main.id
}
zookeeper_node {
vm_size = var.kafka_zookeeper_vm_size
username = "${var.name_prefix}-kafka-user"
password = random_password.feast-kafka-role-password.result
subnet_id = azurerm_subnet.kafka.id
virtual_network_id = azurerm_virtual_network.main.id
}
}
}

resource "random_password" "feast-kafka-role-password" {
length = 16
special = false
min_upper = 1
min_lower = 1
min_numeric = 1
}

resource "random_password" "feast-kafka-gateway-password" {
length = 16
special = true
min_upper = 1
min_lower = 1
min_special = 1
min_numeric = 1
}

resource "kubernetes_secret" "feast-kafka-gateway-secret" {
metadata {
name = "feast-kafka-gateway"
}
data = {
kafka-gateway-password = random_password.feast-kafka-gateway-password.result
}
}
28 changes: 28 additions & 0 deletions infra/terraform/azure/provider.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
provider "azurerm" {
version = "=2.40.0"
features {}
}

provider "helm" {
version = "~> 1.3.2"
kubernetes {
host = azurerm_kubernetes_cluster.main.kube_config.0.host
username = azurerm_kubernetes_cluster.main.kube_config.0.username
password = azurerm_kubernetes_cluster.main.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)
load_config_file = false
}
}

provider "kubernetes" {
version = "~> 1.13.3"
host = azurerm_kubernetes_cluster.main.kube_config.0.host
username = azurerm_kubernetes_cluster.main.kube_config.0.username
password = azurerm_kubernetes_cluster.main.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)
load_config_file = false
}
12 changes: 12 additions & 0 deletions infra/terraform/azure/redis.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
resource "azurerm_redis_cache" "main" {
name = "${var.name_prefix}-redis"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
capacity = var.redis_capacity
family = "P"
sku_name = "Premium"
redis_configuration {
enable_authentication = false
}
subnet_id = azurerm_subnet.redis.id
}
27 changes: 27 additions & 0 deletions infra/terraform/azure/sparkop.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
resource "kubernetes_role" "sparkop-user" {
metadata {
name = "use-spark-operator"
namespace = var.aks_namespace
}
rule {
api_groups = ["sparkoperator.k8s.io"]
resources = ["sparkapplications"]
verbs = ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"]
}
}

resource "kubernetes_role_binding" "sparkop-user" {
metadata {
name = "use-spark-operator"
namespace = var.aks_namespace
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.sparkop-user.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = "default"
}
}
21 changes: 21 additions & 0 deletions infra/terraform/azure/storage.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
resource "azurerm_storage_account" "main" {
name = "${var.name_prefix}storage"
resource_group_name = data.azurerm_resource_group.main.name
location = data.azurerm_resource_group.main.location
account_kind = "StorageV2"
account_tier = "Standard"
account_replication_type = var.storage_account_replication_type
allow_blob_public_access = true
}

resource "azurerm_storage_container" "staging" {
name = "staging"
storage_account_name = azurerm_storage_account.main.name
container_access_type = "blob"
}

resource "azurerm_storage_container" "kafka" {
name = "kafkastorage"
storage_account_name = azurerm_storage_account.main.name
container_access_type = "blob"
}
Loading

0 comments on commit 057424b

Please sign in to comment.