Skip to content

Commit

Permalink
small updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Jun 6, 2024
1 parent f9740c8 commit f43cefc
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
10 changes: 6 additions & 4 deletions tokenizers/src/processors/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
//!
use crate::{Encoding, PostProcessor, Result};
use derive_more::Display;
use display_derive::StructDisplay;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
Expand Down Expand Up @@ -249,8 +250,9 @@ impl SpecialToken {
///
/// [`Piece`]: enum.Piece.html
///
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq, Display)]
#[serde(transparent)]
#[display(fmt="Template([{:?}])",self)]
pub struct Template(Vec<Piece>);

impl<T> TryFrom<Vec<T>> for Template
Expand Down Expand Up @@ -289,8 +291,9 @@ impl TryFrom<&str> for Template {
/// from a HashMap or a Vec<[`SpecialToken`]>.
///
/// [`SpecialToken`]: struct.SpecialToken.html
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, Eq)]
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, Eq, Display)]
#[serde(transparent)]
#[display(fmt="{:?}", self)]
pub struct Tokens(
#[serde(serialize_with = "crate::utils::ordered_map")] pub HashMap<String, SpecialToken>,
);
Expand Down Expand Up @@ -332,9 +335,8 @@ impl From<HashMap<String, SpecialToken>> for Tokens {
/// .unwrap();
/// ```
///
#[derive(Debug, Clone, PartialEq, Builder, Serialize, Deserialize, Eq, Display)]
#[derive(Debug, Clone, PartialEq, Builder, Serialize, Deserialize, Eq, StructDisplay)]
#[serde(tag = "type", from = "TemplateProcessingDeserializer")]
#[display(fmt = "TemplateProcessing({:?})", self)]
#[builder(build_fn(validate = "Self::validate"))]
pub struct TemplateProcessing {
#[builder(try_setter, default = "\"$0\".try_into().unwrap()")]
Expand Down
5 changes: 3 additions & 2 deletions tokenizers/src/tokenizer/added_vocabulary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use super::{
};
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use derive_more::Display;
use display_derive::StructDisplay;
use regex::Regex;
use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer};
use std::collections::{HashMap, HashSet};
Expand All @@ -12,7 +13,7 @@ use std::collections::{HashMap, HashSet};
/// like:
/// - Whether they should only match single words
/// - Whether to include any whitespace on its left or right
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, StructDisplay)]
pub struct AddedToken {
/// The content of the added token
pub content: String,
Expand Down Expand Up @@ -140,7 +141,7 @@ fn space_rightmost_at_start(sentence: &str) -> usize {
/// exist as required.
///
#[derive(Clone, Debug, Display)]
#[display(fmt="AddedVocabulary(added_tokens_map_r={:#?}, encode_special_tokens={})", "added_tokens_map_r", encode_special_tokens)]
#[display(fmt="AddedVocabulary(added_tokens_map_r={{0:{}}}, encode_special_tokens={})", "added_tokens_map_r.get(&0).unwrap()", encode_special_tokens)]
pub struct AddedVocabulary {
/// Contains the mapping from String (token content) to ID. This map contains both special
/// tokens and classic added tokens that were added to the this vocabulary.
Expand Down

0 comments on commit f43cefc

Please sign in to comment.