Add custom tokenizers
This commit is contained in:
@@ -576,4 +576,5 @@ export const languageEnglish = {
|
||||
autoContinueMinTokens: "Target Tokens (Auto Continue)",
|
||||
autoContinueChat: "Anti-Incomplete Response (Auto Continue)",
|
||||
removeIncompleteResponse: "Remove Incomplete Sentences",
|
||||
tokenizer: "Tokenizer",
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
import { DataBase } from "src/ts/storage/database";
|
||||
import { customProviderStore, getCurrentPluginMax } from "src/ts/plugins/plugins";
|
||||
import { getModelMaxContext, isTauri } from "src/ts/storage/globalApi";
|
||||
import { tokenize, tokenizeAccurate } from "src/ts/tokenizer";
|
||||
import { tokenize, tokenizeAccurate, tokenizerList } from "src/ts/tokenizer";
|
||||
import ModelList from "src/lib/UI/ModelList.svelte";
|
||||
import DropList from "src/lib/SideBars/DropList.svelte";
|
||||
import { PlusIcon, TrashIcon } from "lucide-svelte";
|
||||
@@ -242,6 +242,14 @@
|
||||
</SelectInput>
|
||||
{/await}
|
||||
{/if}
|
||||
{#if $DataBase.aiModel === 'openrouter' || $DataBase.aiModel === 'reverse_proxy'}
|
||||
<span class="text-textcolor">{language.tokenizer}</span>
|
||||
<SelectInput bind:value={$DataBase.customTokenizer}>
|
||||
{#each tokenizerList as entry}
|
||||
<OptionInput value={entry[0]}>{entry[1]}</OptionInput>
|
||||
{/each}
|
||||
</SelectInput>
|
||||
{/if}
|
||||
{#if $DataBase.aiModel.startsWith('gpt') || $DataBase.subModel.startsWith('gpt')
|
||||
|| $DataBase.aiModel.startsWith('instructgpt') || $DataBase.subModel.startsWith('instructgpt')}
|
||||
<span class="text-textcolor">OpenAI {language.apiKey} <Help key="oaiapikey"/></span>
|
||||
@@ -254,10 +262,10 @@
|
||||
</div>
|
||||
{/if}
|
||||
{#if $DataBase.aiModel.startsWith('openrouter')}
|
||||
<div class="flex items-center">
|
||||
<div class="flex items-center mb-4">
|
||||
<Check bind:check={$DataBase.openrouterFallback} name={language.openrouterFallback}/>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<div class="flex items-center mb-4">
|
||||
<Check bind:check={$DataBase.openrouterMiddleOut} name={language.openrouterMiddleOut}/>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
@@ -390,6 +390,7 @@ export function setDatabase(data:Database){
|
||||
data.repetition_penalty ??= 1
|
||||
data.min_p ??= 0
|
||||
data.top_a ??= 0
|
||||
data.customTokenizer ??= 'tik'
|
||||
|
||||
changeLanguage(data.language)
|
||||
DataBase.set(data)
|
||||
@@ -633,6 +634,7 @@ export interface Database{
|
||||
autoContinueChat:boolean
|
||||
autoContinueMinTokens:number
|
||||
removeIncompleteResponse:boolean
|
||||
customTokenizer:string
|
||||
}
|
||||
|
||||
export interface customscript{
|
||||
|
||||
@@ -7,8 +7,34 @@ import { supportsInlayImage } from "./process/files/image";
|
||||
import { risuChatParser } from "./parser";
|
||||
import { tokenizeGGUFModel } from "./process/models/local";
|
||||
|
||||
|
||||
export const tokenizerList = [
|
||||
['tik', 'Tiktoken (OpenAI)'],
|
||||
['mistral', 'Mistral'],
|
||||
['novelai', 'NovelAI'],
|
||||
['claude', 'Claude'],
|
||||
['llama', 'Llama'],
|
||||
['novellist', 'Novellist'],
|
||||
] as const
|
||||
|
||||
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
||||
let db = get(DataBase)
|
||||
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
|
||||
switch(db.customTokenizer){
|
||||
case 'mistral':
|
||||
return await tokenizeWebTokenizers(data, 'mistral')
|
||||
case 'llama':
|
||||
return await tokenizeWebTokenizers(data, 'llama')
|
||||
case 'novelai':
|
||||
return await tokenizeWebTokenizers(data, 'novelai')
|
||||
case 'claude':
|
||||
return await tokenizeWebTokenizers(data, 'claude')
|
||||
case 'novellist':
|
||||
return await tokenizeWebTokenizers(data, 'novellist')
|
||||
default:
|
||||
return await tikJS(data)
|
||||
}
|
||||
}
|
||||
if(db.aiModel.startsWith('novellist')){
|
||||
const nv= await tokenizeWebTokenizers(data, 'novellist')
|
||||
return nv
|
||||
|
||||
Reference in New Issue
Block a user