modulesettings.json

{
  "Modules": {

    "LlamaChat": {
      "Name": "LlamaChat",
      "Version": "1.7.1",

      "PublishingInfo" : {
        "Description": "A Large Language Model based on the Machine Learning Compilation for LLMs",
        "IconURL": null,
        "Category": "Generative AI",
        "Stack": "Python, Llama",
        "License": "MIT",
        "LicenseUrl": "https://github.com/abetlen/llama-cpp-python/blob/main/LICENSE.md",
        "Author": "Chris Maunder",
        "Homepage": "https://codeproject.com/ai",
        "BasedOn": "llama-cpp-python",
        "BasedOnUrl": "https://github.com/abetlen/llama-cpp-python"
      },

      "LaunchSettings": {
        "AutoStart": false,
        "FilePath": "llama_chat_adapter.py",
        "Runtime": "python3.11",
        "RuntimeLocation": "Local",       // Can be Local, Shared or System
        "PostStartPauseSecs": 0,          // Generally 1 if using GPU, 0 for CPU
        "Queue": null,                    // Use default
        "Parallelism": 1                  // 0 = Default = number of CPUs / 2
      },

      "EnvironmentVariables": {
        "CPAI_LOG_VERBOSITY": "Quiet",

        "CPAI_MODULE_LLAMA_MODEL_DIR":      "./models",

        // ------- Mistral-7B-Instruct-v0.2-GGUF ---------------
        // For loading model downloaded at install time
        // "CPAI_MODULE_LLAMA_MODEL_FILENAME": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",

        // fallback loading (at runtime, needs internet) via llama-cpp.from_pretrained
        // "CPAI_MODULE_LLAMA_MODEL_REPO":     "@TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
        // "CPAI_MODULE_LLAMA_MODEL_FILEGLOB": "*.Q4_K_M.gguf"

        // ------- Phi-3-mini-4k-instruct-gguf ---------------
        // For loading model downloaded at install time
        "CPAI_MODULE_LLAMA_MODEL_FILENAME": "Phi-3-mini-4k-instruct-q4.gguf",

        // For loading via llama-cpp.from_pretrained
        "CPAI_MODULE_LLAMA_MODEL_REPO":  "@microsoft/Phi-3-mini-4k-instruct-gguf",
        "CPAI_MODULE_LLAMA_MODEL_FILEGLOB": "Phi-3-mini-4k-instruct-q4.gguf"
      },

      "GpuOptions" : {
        "InstallGPU": true,               // GPU support not provided
        "EnableGPU": true,                // Will be coerced to false if InstallGPU = false
        "AcceleratorDeviceName": null,    // = default
        "HalfPrecision": "enable"         // 'Force', 'Enable', 'Disable': whether to force on, allow, or disable half-precision ops
      },
      
      "InstallOptions" : {
        "Platforms": [ "all", "!windows-arm64", "!raspberrypi", "!orangepi", "!radxarock", "!jetson" ],
        "ModuleReleases": [               // Which server version is compatible with each version of this module.
          { "ModuleVersion": "1.0",   "ServerVersionRange": [ "2.3.1", "2.3.5" ], "ReleaseDate": "2022-11-01" },
          { "ModuleVersion": "1.1",   "ServerVersionRange": [ "2.3.5", "2.4.0" ], "ReleaseDate": "2022-11-10", "ReleaseNotes": "Updated to match new installer SDK." },
          { "ModuleVersion": "1.1.1", "ServerVersionRange": [ "2.4.1", "2.4.1" ], "ReleaseDate": "2023-12-06", "ReleaseNotes": "Updated modulesettings schema", "Importance": "Minor" },
          { "ModuleVersion": "1.1.2", "ServerVersionRange": [ "2.4.2", "2.4.9" ], "ReleaseDate": "2023-12-09", "ReleaseNotes": "Installer updates", "Importance": "Minor" },
          { "ModuleVersion": "1.2.0", "ServerVersionRange": [ "2.5.0-RC1", "2.5.0-RC5" ], "ReleaseDate": "2024-01-06", "ReleaseNotes": "Additions for dynamic explorer UI" },
          { "ModuleVersion": "1.2.1", "ServerVersionRange": [ "2.5.0-RC1", "2.5.0-RC5" ], "ReleaseDate": "2024-01-13", "ReleaseNotes": "Changes to SDK" },
          { "ModuleVersion": "1.2.2", "ServerVersionRange": [ "2.5.0-RC6", "2.6.0"     ], "ReleaseDate": "2024-01-16", "ReleaseNotes": "Updated modulesettings schema" },
          { "ModuleVersion": "1.2.3", "ServerVersionRange": [ "2.5.0-RC6", "2.6.0"     ], "ReleaseDate": "2024-01-18", "ReleaseNotes": "Updated explorer" },
          { "ModuleVersion": "1.3.0", "ServerVersionRange": [ "2.5.0-RC6", "2.6.0"     ], "ReleaseDate": "2024-01-21", "ReleaseNotes": "Module performance statistics added" },
          { "ModuleVersion": "1.3.1", "ServerVersionRange": [ "2.5.2",     "2.6.0"     ], "ReleaseDate": "2024-02-08", "ReleaseNotes": "Support for CodeProject.AI Server 2.5.2" },
          { "ModuleVersion": "1.4.0", "ServerVersionRange": [ "2.6.0", "2.6.0" ], "ReleaseDate": "2024-03-26", "ReleaseNotes": "First public release" },
          { "ModuleVersion": "1.4.1", "ServerVersionRange": [ "2.6.1", "2.6.0" ], "ReleaseDate": "2024-04-01", "ReleaseNotes": "Minor corrections" },
          { "ModuleVersion": "1.4.2", "ServerVersionRange": [ "2.6.1", "2.6.0" ], "ReleaseDate": "2024-04-19", "ReleaseNotes": "Corrected detection of GPU usage." },
          { "ModuleVersion": "1.4.3", "ServerVersionRange": [ "2.6.1", "2.6.0" ], "ReleaseDate": "2024-04-25", "ReleaseNotes": "Microsoft Phi-3 and macOS Support." },
          { "ModuleVersion": "1.4.4", "ServerVersionRange": [ "2.6.1", "2.6.0" ], "ReleaseDate": "2024-05-27", "ReleaseNotes": "Corrections to requirements files." },
          { "ModuleVersion": "1.5.0", "ServerVersionRange": [ "2.6.5", "2.7.0" ], "ReleaseDate": "2024-04-25", "ReleaseNotes": "Updated for CodeProject.AI Server 2.6.5" },
          { "ModuleVersion": "1.5.1", "ServerVersionRange": [ "2.6.5", "2.7.0" ], "ReleaseDate": "2024-06-21", "ReleaseNotes": "Switched to Microsoft Phi-3, Corrected Numpy 2.0 issue" },
          { "ModuleVersion": "1.5.2", "ServerVersionRange": [ "2.6.5", "2.7.0" ], "ReleaseDate": "2024-06-26", "ReleaseNotes": "Further work on the Numpy 2.0 issue" },
          { "ModuleVersion": "1.6.0", "ServerVersionRange": [ "2.6.5", "2.7.0" ], "ReleaseDate": "2024-07-02", "ReleaseNotes": "Added option to modify system prompt" },
          { "ModuleVersion": "1.7.0", "ServerVersionRange": [ "2.8.0", ""      ], "ReleaseDate": "2024-08-02", "ReleaseNotes": "Updated for server 2.8" },
          { "ModuleVersion": "1.7.1", "ServerVersionRange": [ "2.8.0", ""      ], "ReleaseDate": "2024-08-12", "ReleaseNotes": "Corrections to installer for Ubuntu" }
        ]
      },

      "ModelRequirements" : [{
          "Task": "Text Generation",
          "Architecture": "GGUF",
          "Format": ""
      }],

      "RouteMaps": [
        {
          "Name": "LlamaChat",
          "Route": "text/chat",
          "Method": "POST",
          "Command": "prompt",
          "MeshEnabled": false,
          "Description": "Uses the Llama LLM to answer simple wiki-based questions.",
          "Inputs": [
            {
              "Name": "prompt",
              "Type": "Text",
              "Description": "The prompt to generate text from"
            },
            {
              "Name": "system_prompt",
              "Type": "Text",
              "Description": "The description of the assistant",
              "Default": "You're a helpful assistant who answers questions the user asks of you concisely and accurately."
            },
            {
              "Name": "max_tokens",
              "Type": "Integer",
              "Description": "The maximum number of tokens to generate",
              "Default": "0 (model default)"
            },
            {
              "Name": "temperature",
              "Type": "Float",
              "Description": "The temperature to use for sampling",
              "Default": 0.4
            }
          ],
          "Outputs": [
            {
              "Name": "success",
              "Type": "Boolean",
              "Description": "True if successful."
            },
            {
              "Name": "reply",
              "Type": "Text",
              "Description": "The reply from the model."
            },
            {
              "Name": "inferenceMs",
              "Type": "Integer",
              "Description": "The time (ms) to perform the AI inference."
            },
            {
              "Name": "processMs",
              "Type": "Integer",
              "Description": "The time (ms) to process the image (includes inference and image manipulation operations)."
            }
          ]
        }
      ]
    }
  }
}