Skip to content

Evefan Configuration Guide

Amadeo Pellicce edited this page Sep 23, 2024 · 8 revisions

This guide explains how to configure Evefan open-source workers using the config.json file. The config.json file should be placed in the root directory of your repository.

Configuration Structure

The config.json file should contain a JSON object with the following top-level properties:

  • batch: Batch processing configuration
  • deploy: Deployment configuration
  • queue: Queue configuration
  • sources: Array of data sources
  • destinations: Array of data destinations

Batch Configuration

The batch object contains settings for batch processing:

  • batchSize: Number of items to process in each batch
  • flushIntervalMs: Interval in milliseconds between batch flushes. Don't increase this past 10 seconds or you risk losing data.

Deploy Configuration

The deploy object specifies deployment settings:

  • scriptName: Name of the script to deploy
  • type: Deployment environment type (currently only "cloudflare" is supported)
  • credentials: Credentials for the deployment environment
  • environmentId: ID of the deployment environment
  • environmentSecret: Secret for the deployment environment

Queue Configuration

The queue object configures the queue processing:

  • batchSize: Number of items to process in each queue batch
  • maxRetries: Maximum number of retry attempts for failed items
  • maxWaitTimeMs: Maximum wait time in milliseconds
  • maxConcurrency: Maximum number of concurrent queue operations
  • type: Queue type (currently only "cloudflare" is supported)
  • credentials: Credentials for the queue service

Sources

The sources array contains objects representing data sources:

  • id: Unique identifier for the source
  • name: Name of the source
  • writeKey: Write key for authenticating and sending data to the source

Destinations

The destinations array contains objects representing data destinations. Each destination has a type-specific configuration.

Full Example

Here's a complete example of a config.json file:

{
  "batch": {
    "batchSize": 100,
    "flushIntervalMs": 5000
  },
  "deploy": {
    "scriptName": "my-data-pipeline",
    "type": "cloudflare",
    "credentials": {
      "accountId": "your-cloudflare-account-id",
      "apiKey": "your-cloudflare-api-key"
    },
    "environmentId": "your-environment-id",
    "environmentSecret": "your-environment-secret"
  },
  "queue": {
    "batchSize": 50,
    "maxRetries": 3,
    "maxWaitTimeMs": 5000,
    "maxConcurrency": 5,
    "type": "cloudflare",
    "credentials": {
      "accountId": "your-cloudflare-account-id",
      "apiKey": "your-cloudflare-api-key"
    }
  },
  "sources": [
    {
      "id": "source1",
      "name": "Web Analytics",
      "writeKey": "xfy4GpPqKyRG7qeHRWPhDz0H1epUJYSc"
    },
    {
      "id": "source2",
      "name": "CRM Data",
      "writeKey": "tCajXVnHVYFNxrbKSZ6ucXBHMnZG3aAK"
    }
  ],
  "destinations": [
   {
      "id": "dest0",
      "name": "S3 Delta",
      "type": "s3-delta",
      "config": {
        "maxRps": 100,
        "batchSize": 50,
        "url": "https://xxx.r2.cloudflarestorage.com",
        "bucket": "evefan",
        "_secret_credentials": {
          "accessKeyId": "yyy",
          "secretAccessKey": "zzz"
        },
      },
      "enabled": true,
      "created": "2023-06-01T00:00:00Z",
      "updated": "2023-06-01T00:00:00Z"
    },
    {
      "id": "dest1",
      "name": "Mixpanel Analytics",
      "type": "mixpanel",
      "config": {
        "maxRps": 100,
        "batchSize": 50,
        "_secret_credentials": {
          "token": "your-mixpanel-token"
        },
        "projectId": "your-mixpanel-project-id",
        "strict": 1,
        "identityMerge": "simplified",
        "region": "api"
      },
      "enabled": true,
      "created": "2023-06-01T00:00:00Z",
      "updated": "2023-06-01T00:00:00Z"
    },
    {
      "id": "dest2",
      "name": "PostgreSQL Database",
      "type": "postgres",
      "config": {
        "maxRps": 50,
        "batchSize": 100,
        "_secret_credentials": {
          "host": "your-postgres-host",
          "user": "your-postgres-user",
          "password": "your-postgres-password",
          "port": 5432,
          "database": "your-database-name"
        }
      },
      "enabled": true,
      "created": "2023-06-01T00:00:00Z",
      "updated": "2023-06-01T00:00:00Z"
    },
    {
      "id": "dest3",
      "name": "BigQuery Data Warehouse",
      "type": "bigquery",
      "config": {
        "maxRps": 200,
        "batchSize": 150,
        "location": "US",
        "_secret_credentials": {
          "type": "service_account",
          "project_id": "your-project-id",
          "private_key_id": "your-private-key-id",
          "private_key": "-----BEGIN PRIVATE KEY-----\nYOUR_PRIVATE_KEY_HERE\n-----END PRIVATE KEY-----\n",
          "client_email": "your-service-account-email@your-project.iam.gserviceaccount.com",
          "client_id": "your-client-id",
          "auth_uri": "https://accounts.google.com/o/oauth2/auth",
          "token_uri": "https://oauth2.googleapis.com/token",
          "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
          "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/your-service-account-email%40your-project.iam.gserviceaccount.com",
          "universe_domain": "googleapis.com"
        }
      },
      "enabled": true,
      "created": "2023-06-01T00:00:00Z",
      "updated": "2023-06-01T00:00:00Z"
    }
  ]
}