Add custom tokenizers
This commit is contained in:
@@ -576,4 +576,5 @@ export const languageEnglish = {
|
|||||||
autoContinueMinTokens: "Target Tokens (Auto Continue)",
|
autoContinueMinTokens: "Target Tokens (Auto Continue)",
|
||||||
autoContinueChat: "Anti-Incomplete Response (Auto Continue)",
|
autoContinueChat: "Anti-Incomplete Response (Auto Continue)",
|
||||||
removeIncompleteResponse: "Remove Incomplete Sentences",
|
removeIncompleteResponse: "Remove Incomplete Sentences",
|
||||||
|
tokenizer: "Tokenizer",
|
||||||
}
|
}
|
||||||
@@ -5,7 +5,7 @@
|
|||||||
import { DataBase } from "src/ts/storage/database";
|
import { DataBase } from "src/ts/storage/database";
|
||||||
import { customProviderStore, getCurrentPluginMax } from "src/ts/plugins/plugins";
|
import { customProviderStore, getCurrentPluginMax } from "src/ts/plugins/plugins";
|
||||||
import { getModelMaxContext, isTauri } from "src/ts/storage/globalApi";
|
import { getModelMaxContext, isTauri } from "src/ts/storage/globalApi";
|
||||||
import { tokenize, tokenizeAccurate } from "src/ts/tokenizer";
|
import { tokenize, tokenizeAccurate, tokenizerList } from "src/ts/tokenizer";
|
||||||
import ModelList from "src/lib/UI/ModelList.svelte";
|
import ModelList from "src/lib/UI/ModelList.svelte";
|
||||||
import DropList from "src/lib/SideBars/DropList.svelte";
|
import DropList from "src/lib/SideBars/DropList.svelte";
|
||||||
import { PlusIcon, TrashIcon } from "lucide-svelte";
|
import { PlusIcon, TrashIcon } from "lucide-svelte";
|
||||||
@@ -242,6 +242,14 @@
|
|||||||
</SelectInput>
|
</SelectInput>
|
||||||
{/await}
|
{/await}
|
||||||
{/if}
|
{/if}
|
||||||
|
{#if $DataBase.aiModel === 'openrouter' || $DataBase.aiModel === 'reverse_proxy'}
|
||||||
|
<span class="text-textcolor">{language.tokenizer}</span>
|
||||||
|
<SelectInput bind:value={$DataBase.customTokenizer}>
|
||||||
|
{#each tokenizerList as entry}
|
||||||
|
<OptionInput value={entry[0]}>{entry[1]}</OptionInput>
|
||||||
|
{/each}
|
||||||
|
</SelectInput>
|
||||||
|
{/if}
|
||||||
{#if $DataBase.aiModel.startsWith('gpt') || $DataBase.subModel.startsWith('gpt')
|
{#if $DataBase.aiModel.startsWith('gpt') || $DataBase.subModel.startsWith('gpt')
|
||||||
|| $DataBase.aiModel.startsWith('instructgpt') || $DataBase.subModel.startsWith('instructgpt')}
|
|| $DataBase.aiModel.startsWith('instructgpt') || $DataBase.subModel.startsWith('instructgpt')}
|
||||||
<span class="text-textcolor">OpenAI {language.apiKey} <Help key="oaiapikey"/></span>
|
<span class="text-textcolor">OpenAI {language.apiKey} <Help key="oaiapikey"/></span>
|
||||||
@@ -254,10 +262,10 @@
|
|||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
{#if $DataBase.aiModel.startsWith('openrouter')}
|
{#if $DataBase.aiModel.startsWith('openrouter')}
|
||||||
<div class="flex items-center">
|
<div class="flex items-center mb-4">
|
||||||
<Check bind:check={$DataBase.openrouterFallback} name={language.openrouterFallback}/>
|
<Check bind:check={$DataBase.openrouterFallback} name={language.openrouterFallback}/>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center">
|
<div class="flex items-center mb-4">
|
||||||
<Check bind:check={$DataBase.openrouterMiddleOut} name={language.openrouterMiddleOut}/>
|
<Check bind:check={$DataBase.openrouterMiddleOut} name={language.openrouterMiddleOut}/>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -390,6 +390,7 @@ export function setDatabase(data:Database){
|
|||||||
data.repetition_penalty ??= 1
|
data.repetition_penalty ??= 1
|
||||||
data.min_p ??= 0
|
data.min_p ??= 0
|
||||||
data.top_a ??= 0
|
data.top_a ??= 0
|
||||||
|
data.customTokenizer ??= 'tik'
|
||||||
|
|
||||||
changeLanguage(data.language)
|
changeLanguage(data.language)
|
||||||
DataBase.set(data)
|
DataBase.set(data)
|
||||||
@@ -633,6 +634,7 @@ export interface Database{
|
|||||||
autoContinueChat:boolean
|
autoContinueChat:boolean
|
||||||
autoContinueMinTokens:number
|
autoContinueMinTokens:number
|
||||||
removeIncompleteResponse:boolean
|
removeIncompleteResponse:boolean
|
||||||
|
customTokenizer:string
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface customscript{
|
export interface customscript{
|
||||||
|
|||||||
@@ -7,8 +7,34 @@ import { supportsInlayImage } from "./process/files/image";
|
|||||||
import { risuChatParser } from "./parser";
|
import { risuChatParser } from "./parser";
|
||||||
import { tokenizeGGUFModel } from "./process/models/local";
|
import { tokenizeGGUFModel } from "./process/models/local";
|
||||||
|
|
||||||
|
|
||||||
|
export const tokenizerList = [
|
||||||
|
['tik', 'Tiktoken (OpenAI)'],
|
||||||
|
['mistral', 'Mistral'],
|
||||||
|
['novelai', 'NovelAI'],
|
||||||
|
['claude', 'Claude'],
|
||||||
|
['llama', 'Llama'],
|
||||||
|
['novellist', 'Novellist'],
|
||||||
|
] as const
|
||||||
|
|
||||||
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
||||||
let db = get(DataBase)
|
let db = get(DataBase)
|
||||||
|
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
|
||||||
|
switch(db.customTokenizer){
|
||||||
|
case 'mistral':
|
||||||
|
return await tokenizeWebTokenizers(data, 'mistral')
|
||||||
|
case 'llama':
|
||||||
|
return await tokenizeWebTokenizers(data, 'llama')
|
||||||
|
case 'novelai':
|
||||||
|
return await tokenizeWebTokenizers(data, 'novelai')
|
||||||
|
case 'claude':
|
||||||
|
return await tokenizeWebTokenizers(data, 'claude')
|
||||||
|
case 'novellist':
|
||||||
|
return await tokenizeWebTokenizers(data, 'novellist')
|
||||||
|
default:
|
||||||
|
return await tikJS(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
if(db.aiModel.startsWith('novellist')){
|
if(db.aiModel.startsWith('novellist')){
|
||||||
const nv= await tokenizeWebTokenizers(data, 'novellist')
|
const nv= await tokenizeWebTokenizers(data, 'novellist')
|
||||||
return nv
|
return nv
|
||||||
|
|||||||
Reference in New Issue
Block a user