Skip to content

Commit

Permalink
src: remove unused code and imports
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
  • Loading branch information
danbev committed Dec 4, 2023
1 parent ebe5e75 commit 4164250
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 32 deletions.
31 changes: 0 additions & 31 deletions crates/llm-chain-llama/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,34 +303,3 @@ impl Tokenizer for LLamaTokenizer<'_> {
Ok(output.to_string())
}
}

fn decode_up_to_valid_utf8(bytes: &[u8]) -> (String, Vec<u8>) {
let (str_output, leftover): (String, Vec<u8>) = match std::str::from_utf8(bytes) {
Ok(s) => (s.to_owned(), Vec::new()),
Err(unicode_err) => {
let index = unicode_err.valid_up_to();
let good = &bytes[0..index];
match unicode_err.error_len() {
None => {
let leftover = bytes[index..].to_vec();
let out = std::str::from_utf8(good).unwrap().to_owned();
(out, leftover)
}
Some(len) => {
//let bad = &bytes[index..index+len];
//eprintln!("bad utf8: {:?}", bad);
let rest = &bytes[index + len..];
let beggining = std::str::from_utf8(good).unwrap().to_owned();
let (after, leftover) = decode_up_to_valid_utf8(rest);

let mut out = beggining;
out.push_str(&std::char::REPLACEMENT_CHARACTER.to_string().repeat(len));
out.push_str(&after);

(out, leftover)
}
}
}
};
(str_output, leftover)
}
2 changes: 1 addition & 1 deletion crates/llm-chain-llama/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use llm_chain::prompt::Data;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;

use llm_chain_llama_sys::{llama_token, llama_token_get_text, llama_token_nl, llama_tokenize};
use llm_chain_llama_sys::{llama_token, llama_token_get_text, llama_tokenize};

use crate::context::LLamaContext;

Expand Down

0 comments on commit 4164250

Please sign in to comment.