SkyAPM · wu-sheng · Aug 6, 2024 · Aug 6, 2024
diff --git a/models/Configuration.md b/models/Configuration.md
@@ -7,40 +7,46 @@ All configurations in URI Drain are done using `uri_drain.ini` file. [Here is a
 Snapshot is used to serialize and store the analysis results that have been saved in the current system. 
 Currently, it supports saving snapshots to the file system.
 
-| Name                      | Type(Unit)  | Default | Description                                                                               |
-|---------------------------|-------------|---------|-------------------------------------------------------------------------------------------|
-| file_dir                  | string      | /tmp/   | The directory to save the snapshot, the persistent would disable when the value is empty. |
-| snapshot_interval_minutes | int(minute) | 10      | The interval to save the snapshot.                                                        |
-| compress_state            | bool        | True    | Whether to compress the snapshot through zlib with base64.                                |
+| Name                      | Type(Unit)  | Environment Key           | Default | Description                                                                               |
+|---------------------------|-------------|---------------------------|---------|-------------------------------------------------------------------------------------------|
+| file_dir                  | string      | SNAPSHOT_FILE_PATH        | /tmp/   | The directory to save the snapshot, the persistent would disable when the value is empty. |
+| snapshot_interval_minutes | int(minute) | SNAPSHOT_INTERVAL_MINUTES | 10      | The interval to save the snapshot.                                                        |
+| compress_state            | bool        | SNAPSHOT_COMPRESS_STATE   | True    | Whether to compress the snapshot through zlib with base64.                                |
 
 ### Masking
 
 When aggregation methods are detected, Masking determines how to generate the aggregation information.
 
 Currently, all similar content is replaced with `{var}` by default.
 
-| Name        | Type(Unit) | Default | Description                       |
-|-------------|------------|---------|-----------------------------------|
-| mask_prefix | string     | {       | The prefix to mask the parameter. |
-| mask_suffix | string     | }       | The suffix to mask the parameter. |
+| Name        | Type(Unit) | Environment Key | Default | Description                       |
+|-------------|------------|-----------------|---------|-----------------------------------|
+| mask_prefix | string     | MASKING_PREFIX  | {       | The prefix to mask the parameter. |
+| mask_suffix | string     | MASKING_SUFFIX  | }       | The suffix to mask the parameter. |
 
 ### Drain
 
 Drain is the core algorithm of URI Drain. 
 
-| Name             | Type(Unit) | Default | Description                                                                                                                                                          |
-|------------------|------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| sim_th           | float      | 0.4     | The similarity threshold to decide if a new sequence should be merged into an existing cluster.                                                                      |
-| depth            | int        | 4       | Max depth levels of pattern. Minimum is 2.                                                                                                                           |
-| max_children     | int        | 100     | Max number of children of an internal node.                                                                                                                          |
-| max_clusters     | int        | 1024    | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. |
-| extra_delimiters | string     | /       | The extra delimiters to split the sequence.                                                                                                                          |
+| Name             | Type(Unit) | Environment Key        | Default | Description                                                                                                                                                          |
+|------------------|------------|------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| sim_th           | float      | DRAIN_SIM_TH           | 0.4     | The similarity threshold to decide if a new sequence should be merged into an existing cluster.                                                                      |
+| depth            | int        | DRAIN_DEPTH            | 4       | Max depth levels of pattern. Minimum is 2.                                                                                                                           |
+| max_children     | int        | DRAIN_MAX_CHILDREN     | 100     | Max number of children of an internal node.                                                                                                                          |
+| max_clusters     | int        | DRAIN_MAX_CLUSTERS     | 1024    | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. |
+| extra_delimiters | string     | DRAIN_EXTRA_DELIMITERS | \["/"\] | The extra delimiters to split the sequence.                                                                                                                          |
 
 ### Profiling
 
 Profiling is used to enable the profiling of the algorithm.
 
-| Name       | Type(Unit)  | Default | Description                                       |
-|------------|-------------|---------|---------------------------------------------------|
-| enabled    | bool        | False   | Whether to enable the profiling.                  |
-| report_sec | int(second) | 30      | The interval to report the profiling information. |
+| Name       | Type(Unit)  | Environment Key      | Default | Description                                       |
+|------------|-------------|----------------------|---------|---------------------------------------------------|
+| enabled    | bool        | PROFILING_ENABLED    | False   | Whether to enable the profiling.                  |
+| report_sec | int(second) | PROFILING_REPORT_SEC | 30      | The interval to report the profiling information. |
+
+## Configuration with Environment Variables
+
+In the configuration, you can see that most of the configurations are in the format `${xxx:config_value}`.
+It means that when the program starts, the agent would first read the `xxx` from the **system environment variables** in the runtime.
+If it cannot be found, the value would be used as the `config_value` as value.
diff --git a/models/uri_drain/template_miner_config.py b/models/uri_drain/template_miner_config.py
@@ -4,11 +4,14 @@
 import configparser
 import json
 import logging
+import os
+import re
 
 from models.uri_drain.masking import MaskingInstruction
 
 logger = logging.getLogger(__name__)
 
+env_regular_regex = re.compile(r'\${(?P<ENV>[_A-Z0-9]+):(?P<DEF>.*)}')
 
 class TemplateMinerConfig:
     def __init__(self):
@@ -40,46 +43,63 @@ def load(self, config_filename: str):
         section_drain = 'DRAIN'
         section_masking = 'MASKING'
 
-        self.engine = parser.get(section_drain, 'engine', fallback=self.engine)
+        self.engine = self.read_config_value(parser, section_drain, 'engine', str, self.engine)
 
-        self.profiling_enabled = parser.getboolean(section_profiling, 'enabled',
-                                                   fallback=self.profiling_enabled)
-        self.profiling_report_sec = parser.getint(section_profiling, 'report_sec',
-                                                  fallback=self.profiling_report_sec)
+        self.profiling_enabled = self.read_config_value(parser, section_profiling, 'enabled', bool, self.profiling_enabled)
+        self.profiling_report_sec = self.read_config_value(parser, section_profiling, 'report_sec', int, self.profiling_report_sec)
 
-        self.snapshot_interval_minutes = parser.getint(section_snapshot, 'snapshot_interval_minutes',
-                                                       fallback=self.snapshot_interval_minutes)
-        self.snapshot_compress_state = parser.getboolean(section_snapshot, 'compress_state',
-                                                         fallback=self.snapshot_compress_state)
-        file_path = parser.get(section_snapshot, 'file_path', fallback=None)
+        self.snapshot_interval_minutes = self.read_config_value(parser, section_snapshot, 'snapshot_interval_minutes',
+                                                                int, self.snapshot_interval_minutes)
+        self.snapshot_compress_state = self.read_config_value(parser, section_snapshot, 'compress_state', bool,
+                                                              self.snapshot_compress_state)
+        file_path = self.read_config_value(parser, section_snapshot, 'file_path', str, None)
         if file_path:
             self.snapshot_file_dir = file_path
 
-        drain_extra_delimiters_str = parser.get(section_drain, 'extra_delimiters',
-                                                fallback=str(self.drain_extra_delimiters))
+        drain_extra_delimiters_str = self.read_config_value(parser, section_drain, 'extra_delimiters', str,
+                                                            str(self.drain_extra_delimiters))
         self.drain_extra_delimiters = ast.literal_eval(drain_extra_delimiters_str)
 
-        self.drain_sim_th = parser.getfloat(section_drain, 'sim_th',
-                                            fallback=self.drain_sim_th)
-        self.drain_depth = parser.getint(section_drain, 'depth',
-                                         fallback=self.drain_depth)
-        self.drain_max_children = parser.getint(section_drain, 'max_children',
-                                                fallback=self.drain_max_children)
-        self.drain_max_clusters = parser.getint(section_drain, 'max_clusters',
-                                                fallback=self.drain_max_clusters)
-        self.parametrize_numeric_tokens = parser.getboolean(section_drain, 'parametrize_numeric_tokens',
-                                                            fallback=self.parametrize_numeric_tokens)
-
-        masking_instructions_str = parser.get(section_masking, 'masking',
-                                              fallback=str(self.masking_instructions))
-        self.mask_prefix = parser.get(section_masking, 'mask_prefix', fallback=self.mask_prefix)
-        self.mask_suffix = parser.get(section_masking, 'mask_suffix', fallback=self.mask_suffix)
-        self.parameter_extraction_cache_capacity = parser.get(section_masking, 'parameter_extraction_cache_capacity',
-                                                              fallback=self.parameter_extraction_cache_capacity)
+        self.drain_sim_th = self.read_config_value(parser, section_drain, 'sim_th', float, self.drain_sim_th)
+        self.drain_depth = self.read_config_value(parser, section_drain, 'depth', int, self.drain_depth)
+        self.drain_max_children = self.read_config_value(parser, section_drain, 'max_children', int,
+                                                         self.drain_max_children)
+        self.drain_max_clusters = self.read_config_value(parser, section_drain, 'max_clusters', int,
+                                                         self.drain_max_clusters)
+        self.parametrize_numeric_tokens = self.read_config_value(parser, section_drain, 'parametrize_numeric_tokens',
+                                                                 bool, self.parametrize_numeric_tokens)
+
+        masking_instructions_str = self.read_config_value(parser, section_masking, 'masking', str,
+                                                          str(self.masking_instructions))
+        self.mask_prefix = self.read_config_value(parser, section_masking, 'mask_prefix', str, self.mask_prefix)
+        self.mask_suffix = self.read_config_value(parser, section_masking, 'mask_suffix', str, self.mask_suffix)
+        self.parameter_extraction_cache_capacity = self.read_config_value(parser, section_masking,
+                                                                          'parameter_extraction_cache_capacity', int,
+                                                                          self.parameter_extraction_cache_capacity)
 
         masking_instructions = []
         masking_list = json.loads(masking_instructions_str)
         for mi in masking_list:
             instruction = MaskingInstruction(mi['regex_pattern'], mi['mask_with'])
             masking_instructions.append(instruction)
         self.masking_instructions = masking_instructions
+
+    def read_value_with_env(self, value: str):
+        match = env_regular_regex.match(value)
+        if match:
+            env = match.group('ENV')
+            default = match.group('DEF')
+            return os.getenv(env, default)
+        else:
+            return value
+
+    def read_config_value(self, parser, section, key, tp, default):
+        conf_value = parser.get(section, key, fallback=None)
+        if conf_value is None:
+            return default
+        val = self.read_value_with_env(conf_value)
+        if tp == bool:
+            if val.lower() not in parser.BOOLEAN_STATES:
+                raise ValueError('Not a boolean: %s' % val)
+            return parser.BOOLEAN_STATES[val.lower()]
+        return tp(val)
diff --git a/servers/simple/uri_drain.ini b/servers/simple/uri_drain.ini
@@ -13,28 +13,28 @@
 #  limitations under the License.
 
 [SNAPSHOT]
-file_path = /tmp/
-snapshot_interval_minutes = 10
-compress_state = True
+file_path = ${SNAPSHOT_FILE_PATH:/tmp/}
+snapshot_interval_minutes = ${SNAPSHOT_INTERVAL_MINUTES:10}
+compress_state = ${SNAPSHOT_COMPRESS_STATE:True}
 
 [MASKING]
 ;masking = [
 ;          {"regex_pattern":"\\d+", "mask_with": "INT"}
 ;          ]
-mask_prefix = {
-mask_suffix = }
+mask_prefix = ${MASKING_PREFIX:{}
+mask_suffix = ${MASKING_SUFFIX:}}
 
 [DRAIN]
 # engine is Optional parameter. Engine will be "Drain" if the engine argument is not specified.
 # engine has two options: 'Drain' and 'JaccardDrain'.
-# engine = Drain
-sim_th = 0.4
+engine = ${DRAIN_ENGINE:Drain}
+sim_th = ${DRAIN_SIM_TH:0.4}
 ; TODO: Evaluate: sim_th is dynamically calculated.
-depth = 4
-max_children = 100
-max_clusters = 1024
-extra_delimiters = ["/"]
+depth = ${DRAIN_DEPTH:4}
+max_children = ${DRAIN_MAX_CHILDREN:100}
+max_clusters = ${DRAIN_MAX_CLUSTERS:1024}
+extra_delimiters = ${DRAIN_EXTRA_DELIMITERS:["/"]}
 
 [PROFILING]
-enabled = False
-report_sec = 30
+enabled = ${PROFILING_ENABLED:False}
+report_sec = ${PROFILING_REPORT_SEC:30}