From bee4a8cf2607146ec50123401c495c5cf8661a53 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Sun, 5 Feb 2023 02:35:01 -0600
Subject: [PATCH] Sync changes to nginx coding to PR

---
 docker/configure_workers_and_start.py | 41 ++++++++++++++++-----------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 4421dce8a..ce7f46e31 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -293,7 +293,7 @@
 """
 
 NGINX_UPSTREAM_CONFIG_BLOCK = """
-upstream {upstream_worker_type} {{
+upstream {upstream_worker_base_name} {{
 {body}
 }}
 """
@@ -706,10 +706,15 @@ def generate_worker_files(
     # more than a single worker for cases where multiples would be bad(e.g. presence).
     worker_type_shard_counter: Dict[str, int] = {}
 
-    # Similar to above, but for worker name's. This is used to check that worker names
-    # for different worker types or combinations of types is not used, as it will error
-    # with 'Address in use'(e.g. "to_device, to_device=typing" would error.
-    worker_name_type_counter: Dict[str, str] = {}
+    # Similar to above, but for worker name's. This has two uses:
+    # 1. Can be used to check that worker names for different worker types or
+    #       combinations of types is not used, as it will error with 'Address in
+    #       use'(e.g. "to_device, to_device=typing" would not work).
+    # 2. Convenient way to get the combination of worker types from worker_name after
+    #       processing and merging.
+    # Follows the pattern:
+    # ["worker_name": "worker_type(s)"]
+    worker_name_type_list: Dict[str, str] = {}
 
     # A list of internal endpoints to healthcheck, starting with the main process
     # which exists even if no workers do.
@@ -866,11 +871,11 @@ def generate_worker_files(
         # different worker_type. If it's not allowed, will error and stop. If no issues,
         # it will be added to the counter.
         if is_name_allowed_for_worker(
-            worker_name_type_counter,
+            worker_name_type_list,
             worker_base_name,
             worker_type,
         ):
-            worker_name_type_counter.setdefault(worker_base_name, worker_type)
+            worker_name_type_list.setdefault(worker_base_name, worker_type)
 
         else:
             error(
@@ -880,7 +885,7 @@ def generate_worker_files(
                 + worker_type
                 + ". It is already in use by "
                 # This is cast as a str because mypy thinks it could be None
-                + str(worker_name_type_counter.get(worker_base_name))
+                + str(worker_name_type_list.get(worker_base_name))
             )
 
         # Replace placeholder names in the config template with the actual worker name.
@@ -925,7 +930,7 @@ def generate_worker_files(
                 # Create or add to a load-balanced upstream for this worker
                 nginx_upstreams.setdefault(worker_base_name, set()).add(worker_port)
 
-                # Upstreams are named after the worker_type
+                # Upstreams are named after the worker_base_name
                 upstream = "http://" + worker_base_name
             else:
                 upstream = "http://localhost:%d" % (worker_port,)
@@ -962,11 +967,11 @@ def generate_worker_files(
     worker_type_load_balance_header_list = ["synchrotron"]
     worker_type_load_balance_ip_list = ["federation_inbound"]
 
-    for upstream_worker_type, upstream_worker_ports in nginx_upstreams.items():
+    for upstream_worker_base_name, upstream_worker_ports in nginx_upstreams.items():
         body = ""
         for port in upstream_worker_ports:
             body += "    server localhost:%d;\n" % (port,)
-        log("upstream_worker_type: " + upstream_worker_type)
+        log("upstream_worker_base_name: " + upstream_worker_base_name)
 
         # This presents a dilemma. Some endpoints are better load-balanced by
         # Authorization header, and some by remote IP. What do you do if a combo
@@ -995,11 +1000,13 @@ def generate_worker_files(
         # cache data. This works well for federation.
         if any(
             x in worker_type_load_balance_ip_list
-            for x in upstream_worker_type.split("+")
+            for x in str(worker_name_type_list.get(upstream_worker_base_name)).split(
+                "+"
+            )
         ):
             nginx_upstream_config += (
                 NGINX_UPSTREAM_HASH_BY_CLIENT_IP_CONFIG_BLOCK.format(
-                    upstream_worker_type=upstream_worker_type,
+                    upstream_worker_base_name=upstream_worker_base_name,
                     body=body,
                 )
             )
@@ -1008,16 +1015,18 @@ def generate_worker_files(
         # upstream source, like a synchrotron worker, with smarter caching of data.
         elif any(
             x in worker_type_load_balance_header_list
-            for x in upstream_worker_type.split("+")
+            for x in str(worker_name_type_list.get(upstream_worker_base_name)).split(
+                "+"
+            )
         ):
             nginx_upstream_config += NGINX_UPSTREAM_HASH_BY_AUTH_HEADER_BLOCK.format(
-                upstream_worker_type=upstream_worker_type,
+                upstream_worker_base_name=upstream_worker_base_name,
                 body=body,
             )
         # Everything else, just use the default basic round-robin scheme.
         else:
             nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format(
-                upstream_worker_type=upstream_worker_type,
+                upstream_worker_base_name=upstream_worker_base_name,
                 body=body,
             )