From 74f3ed46d6bedbe879944647f807bd175f9ee98b Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Sun, 28 Apr 2024 11:27:51 -0700 Subject: [PATCH] Handle nsnumber score values --- Sources/Tokenizers/UnigramTokenizer.swift | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Sources/Tokenizers/UnigramTokenizer.swift b/Sources/Tokenizers/UnigramTokenizer.swift index 2fe754d..2ac672b 100644 --- a/Sources/Tokenizers/UnigramTokenizer.swift +++ b/Sources/Tokenizers/UnigramTokenizer.swift @@ -6,6 +6,7 @@ // Copyright © 2024 Hugging Face. All rights reserved. // +import Foundation import Hub class UnigramTokenizer: PreTrainedTokenizerModel { @@ -37,8 +38,20 @@ class UnigramTokenizer: PreTrainedTokenizerModel { } vocab = try configVocab.map { piece in - guard let token = piece.first as? String else { throw TokenizerError.malformedVocab } - guard let score = piece.last as? Float else { throw TokenizerError.malformedVocab } + guard let token = piece.first as? String, + let scoreValue = piece.last else { + throw TokenizerError.malformedVocab + } + + let score: Float + if let floatScore = scoreValue as? Float { + score = floatScore + } else if let numberScore = scoreValue as? NSNumber { + score = numberScore.floatValue + } else { + throw TokenizerError.malformedVocab + } + return SentencePieceToken(token: token, score: score) }