Update default settings for qianfan and tongyi

guoyao · Aug 13, 2024 · 9b013b2 · 9b013b2
1 parent 64c6a8d
commit 9b013b2
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 14 deletions.
diff --git a/example_settings/qianfan/settings.yaml b/example_settings/qianfan/settings.yaml
@@ -5,32 +5,32 @@ llm:
   api_key: ${GRAPHRAG_API_KEY}
   type: openai_chat # or azure_openai_chat
   model: qianfan.ERNIE-4.0-Turbo-8K
-  model_supports_json: false # recommended if this is available for your model.
+  model_supports_json: false # recommended if this is available for your model, original default is true
   # max_tokens: 4000
   # request_timeout: 180.0
   # api_base: https://<instance>.openai.azure.com
   # api_version: 2024-02-15-preview
   # organization: <organization_id>
   # deployment_name: <azure_model_deployment_name>
   # tokens_per_minute: 150_000 # set a leaky bucket throttle
-  # requests_per_minute: 10_000 # set a leaky bucket throttle
+  requests_per_minute: 120 # set a leaky bucket throttle，original default is 10_000
   # max_retries: 10
   # max_retry_wait: 10.0
   # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
-  # concurrent_requests: 25 # the number of parallel inflight requests that may be made
-  temperature: 0.1 # temperature for sampling
+  concurrent_requests: 2 # the number of parallel inflight requests that may be made，original default is 25
+  temperature: 0.1 # temperature for sampling, original default is 0
   # top_p: 1 # top-p sampling
   # n: 1 # Number of completions to generate
 
 parallelization:
   stagger: 0.3
   # num_threads: 50 # the number of threads to use for parallel processing
 
-async_mode: asyncio # or asyncio
+async_mode: asyncio # or threaded
 
 embeddings:
   ## parallelization: override the global parallelization settings for embeddings
-  async_mode: asyncio # or asyncio
+  async_mode: asyncio # or threaded
   llm:
     api_key: ${GRAPHRAG_API_KEY}
     type: openai_embedding # or azure_openai_embedding
@@ -138,16 +138,16 @@ local_search:
   # conversation_history_max_turns: 5
   # top_k_mapped_entities: 10
   # top_k_relationships: 10
-  llm_temperature: 0.1 # temperature for sampling
+  llm_temperature: 0.1 # temperature for sampling, original default is 0
   # llm_top_p: 1 # top-p sampling
   # llm_n: 1 # Number of completions to generate
-  max_tokens: 5120
+  max_tokens: 5000 # original default is 12000
 
 global_search:
-  llm_temperature: 0.1 # temperature for sampling
+  llm_temperature: 0.1 # temperature for sampling, original default is 0
   # llm_top_p: 1 # top-p sampling
   # llm_n: 1 # Number of completions to generate
-  max_tokens: 5120
+  max_tokens: 5000 # original default is 12000
   # data_max_tokens: 12000
   # map_max_tokens: 1000
   # reduce_max_tokens: 2000

diff --git a/example_settings/tongyi/settings.yaml b/example_settings/tongyi/settings.yaml
@@ -5,7 +5,7 @@ llm:
   api_key: ${GRAPHRAG_API_KEY}
   type: openai_chat # or azure_openai_chat
   model: tongyi.qwen-plus
-  model_supports_json: false # recommended if this is available for your model.
+  model_supports_json: false # recommended if this is available for your model, original default is true
   # max_tokens: 4000
   # request_timeout: 180.0
   # api_base: https://<instance>.openai.azure.com
@@ -19,7 +19,7 @@ llm:
   # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
   # concurrent_requests: 25 # the number of parallel inflight requests that may be made
   # temperature: 0 # temperature for sampling
-  top_p: 0.9 # top-p sampling
+  top_p: 0.9 # top-p sampling, original default is 1
   # n: 1 # Number of completions to generate
 
 parallelization:
@@ -139,13 +139,13 @@ local_search:
   # top_k_mapped_entities: 10
   # top_k_relationships: 10
   # llm_temperature: 0 # temperature for sampling
-  llm_top_p: 0.9 # top-p sampling
+  llm_top_p: 0.9 # top-p sampling, original default is 1
   # llm_n: 1 # Number of completions to generate
   # max_tokens: 12000
 
 global_search:
   # llm_temperature: 0 # temperature for sampling
-  llm_top_p: 0.9 # top-p sampling
+  llm_top_p: 0.9 # top-p sampling, original default is 1
   # llm_n: 1 # Number of completions to generate
   # max_tokens: 12000
   # data_max_tokens: 12000