feat: Add support for Cohere AI model in tokenizer

This commit is contained in:
kwaroran
2024-05-28 02:30:43 +09:00
parent b89890594b
commit b94323510d
2 changed files with 508779 additions and 1 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -82,11 +82,14 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr
if(db.aiModel.startsWith('gemini')){ if(db.aiModel.startsWith('gemini')){
return await tokenizeWebTokenizers(data, 'gemma') return await tokenizeWebTokenizers(data, 'gemma')
} }
if(db.aiModel.startsWith('cohere')){
return await tokenizeWebTokenizers(data, 'cohere')
}
return await tikJS(data) return await tikJS(data)
} }
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma' type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'
let tikParser:Tiktoken = null let tikParser:Tiktoken = null
let tokenizersTokenizer:Tokenizer = null let tokenizersTokenizer:Tokenizer = null
@@ -164,6 +167,11 @@ async function tokenizeWebTokenizers(text:string, type:tokenizerType) {
await (await fetch("/token/llama/llama3.json") await (await fetch("/token/llama/llama3.json")
).arrayBuffer()) ).arrayBuffer())
break break
case 'cohere':
tokenizersTokenizer = await webTokenizer.Tokenizer.fromJSON(
await (await fetch("/token/cohere/tokenizer.json")
).arrayBuffer())
break
case 'novelai': case 'novelai':
tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece( tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece(
await (await fetch("/token/nai/nerdstash_v2.model") await (await fetch("/token/nai/nerdstash_v2.model")