Add LoadingStatusState and improve tokenizer functionality
This commit is contained in:
@@ -8,6 +8,7 @@ import { tokenizeGGUFModel } from "./process/models/local";
|
||||
import { globalFetch } from "./globalApi.svelte";
|
||||
import { getModelInfo, LLMTokenizer } from "./model/modellist";
|
||||
import { pluginV2 } from "./plugins/plugins";
|
||||
import type { GemmaTokenizer } from "@huggingface/transformers";
|
||||
|
||||
|
||||
export const tokenizerList = [
|
||||
@@ -39,7 +40,7 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr
|
||||
case 'llama3':
|
||||
return await tokenizeWebTokenizers(data, 'llama')
|
||||
case 'gemma':
|
||||
return await tokenizeWebTokenizers(data, 'gemma')
|
||||
return await gemmaTokenize(data)
|
||||
case 'cohere':
|
||||
return await tokenizeWebTokenizers(data, 'cohere')
|
||||
default:
|
||||
@@ -65,7 +66,7 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr
|
||||
case 'llama3':
|
||||
return await tokenizeWebTokenizers(data, 'llama')
|
||||
case 'gemma':
|
||||
return await tokenizeWebTokenizers(data, 'gemma')
|
||||
return await gemmaTokenize(data)
|
||||
case 'cohere':
|
||||
return await tokenizeWebTokenizers(data, 'cohere')
|
||||
case 'o200k_base':
|
||||
@@ -105,7 +106,7 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr
|
||||
return await tokenizeGoogleCloud(data)
|
||||
}
|
||||
if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
|
||||
return await tokenizeWebTokenizers(data, 'gemma')
|
||||
return await gemmaTokenize(data)
|
||||
}
|
||||
if(modelInfo.tokenizer === LLMTokenizer.Cohere){
|
||||
return await tokenizeWebTokenizers(data, 'cohere')
|
||||
@@ -157,6 +158,17 @@ async function tokenizeGoogleCloud(text:string) {
|
||||
return new Uint32Array(count)
|
||||
}
|
||||
|
||||
let gemmaTokenizer:GemmaTokenizer = null
|
||||
async function gemmaTokenize(text:string) {
|
||||
if(!gemmaTokenizer){
|
||||
const {GemmaTokenizer} = await import('@huggingface/transformers')
|
||||
gemmaTokenizer = new GemmaTokenizer(
|
||||
await (await fetch("/token/llama/llama3.json")
|
||||
).json(), {})
|
||||
}
|
||||
return gemmaTokenizer.encode(text)
|
||||
}
|
||||
|
||||
async function tikJS(text:string, model='cl100k_base') {
|
||||
if(!tikParser || lastTikModel !== model){
|
||||
if(model === 'cl100k_base'){
|
||||
|
||||
Reference in New Issue
Block a user