Add custom tokenizers

This commit is contained in:
kwaroran
2024-04-19 13:35:56 +09:00
parent b291242830
commit da272d83d8
4 changed files with 40 additions and 3 deletions

View File

@@ -390,6 +390,7 @@ export function setDatabase(data:Database){
data.repetition_penalty ??= 1
data.min_p ??= 0
data.top_a ??= 0
data.customTokenizer ??= 'tik'
changeLanguage(data.language)
DataBase.set(data)
@@ -633,6 +634,7 @@ export interface Database{
autoContinueChat:boolean
autoContinueMinTokens:number
removeIncompleteResponse:boolean
customTokenizer:string
}
export interface customscript{

View File

@@ -7,8 +7,34 @@ import { supportsInlayImage } from "./process/files/image";
import { risuChatParser } from "./parser";
import { tokenizeGGUFModel } from "./process/models/local";
export const tokenizerList = [
['tik', 'Tiktoken (OpenAI)'],
['mistral', 'Mistral'],
['novelai', 'NovelAI'],
['claude', 'Claude'],
['llama', 'Llama'],
['novellist', 'Novellist'],
] as const
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
let db = get(DataBase)
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
switch(db.customTokenizer){
case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral')
case 'llama':
return await tokenizeWebTokenizers(data, 'llama')
case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai')
case 'claude':
return await tokenizeWebTokenizers(data, 'claude')
case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist')
default:
return await tikJS(data)
}
}
if(db.aiModel.startsWith('novellist')){
const nv= await tokenizeWebTokenizers(data, 'novellist')
return nv