Add the DRY dynamic N-gram anti-repetition sampler

The DRY (Do not Repeat Yourself) sampler is a dynamic N-gram repetition penalty that negatively scores tokens that would extend sequences that already appear in the context. See this discussion for a motivation and explanation of the sampler: oobabooga/text-generation-webui#5677 This implementation of DRY mostly aligns with the obabooga version with a few modifications. It uses a more efficient linear scanning algorithm to identify repetitions. It also supports multi-token sequence breakers. As a limitation, this implementation reuses the rep pen range parameter, rather than introducing a new range just for the DRY sampler. There is a separate change to lite.koboldai.net that exposes the DRY sampler parameters to KoboldAI Lite, so none of the embed files have been changed as part of this commit.
Nexesenex · Jul 8, 2024 · e841782 · e841782
1 parent cd1c794
commit e841782
Show file tree

Hide file tree

Showing 5 changed files with 336 additions and 6 deletions.
diff --git a/common/common.h b/common/common.h
@@ -113,6 +113,10 @@ struct gpt_params {
     int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float   mirostat_tau      = 5.00f; // target entropy
     float   mirostat_eta      = 0.10f; // learning rate
+    float   dry_multiplier    = 0.0f;  // penalty multiplier, 0.0 = disabled
+    float   dry_base          = 1.75f; // exponential base
+    int32_t dry_allowed_length = 2;    // repeated sequences longer than this are penalized
+    std::vector<std::string> dry_restart_sequences; // DRY sequence breakers
 
     // DynaTemp!
     float   dynatemp_range     = 0.0f;  // enables DynaTemp if greater than 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range

diff --git a/common/sampling.h b/common/sampling.h
@@ -53,6 +53,9 @@ typedef struct llama_sampling_params {
     int32_t     mirostat              = 0;                  // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float       mirostat_tau          = 5.00f;              // target entropy
     float       mirostat_eta          = 0.10f;              // learning rate
+    float       dry_multiplier        = 0.0f;               // DRY penalty scale, 0.0 = disabled
+    float       dry_base              = 1.75f;              // DRY exponent base, 0.0 = disabled
+    int32_t     dry_allowed_length    = 2;                  // DRY penalizes repeated sequences longer than this
     bool        penalize_nl           = false;              // consider newlines as a repeatable token
     uint32_t    seed                  = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
 

diff --git a/expose.h b/expose.h
@@ -5,6 +5,7 @@ const int stop_token_max = 16;
 const int ban_token_max = 16;
 const int tensor_split_max = 16;
 const int logit_bias_max = 16;
+const int dry_seq_break_max = 16;
 const int images_max = 4;
 
 // match kobold's sampler list and order
@@ -17,6 +18,7 @@ enum samplers
     KCPP_SAMPLER_TYP=4,
     KCPP_SAMPLER_TEMP=5,
     KCPP_SAMPLER_REP_PEN=6,
+    KCPP_SAMPLER_DRY=7,
     KCPP_SAMPLER_MAX
 };
 enum stop_reason
@@ -89,6 +91,10 @@ struct generation_inputs
     const int mirostat = 0;
     const float mirostat_eta = 0.0f;
     const float mirostat_tau = 0.0f;
+    const float dry_multiplier = 0.0f;
+    const float dry_base = 0.0f;
+    const int dry_allowed_length = 0.0f;
+    const char * dry_sequence_breakers[dry_seq_break_max] = {};
     const samplers sampler_order[KCPP_SAMPLER_MAX] = {};
     const int sampler_len = 0;
     const bool allow_eos_token = false;