[feat] trin & claude tokenizer + novellist support
This commit is contained in:
@@ -79,6 +79,8 @@
|
||||
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('textgen_webui')}}>Oobabooga WebUI</button>
|
||||
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('palm2')}}>Google PaLM2</button>
|
||||
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('kobold')}}>Kobold</button>
|
||||
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('novellist')}}>Novellist</button>
|
||||
|
||||
{#if isTauri ||isNodeServer}
|
||||
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('novelai')}}>NovelAI Clio</button>
|
||||
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('deepai')}}>DeepAI</button>
|
||||
|
||||
@@ -586,6 +586,13 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
|
||||
}
|
||||
}
|
||||
|
||||
if(response.data.error){
|
||||
return {
|
||||
'type': 'fail',
|
||||
'result': `${response.data.error.replace("token", "api key")}`
|
||||
}
|
||||
}
|
||||
|
||||
const result = response.data.data[0];
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,18 +1,26 @@
|
||||
import type { Tiktoken } from "@dqbd/tiktoken";
|
||||
import type { Tokenizer } from "@mlc-ai/web-tokenizers";
|
||||
|
||||
import { DataBase, type character } from "./storage/database";
|
||||
import { get } from "svelte/store";
|
||||
import { tokenizeTransformers } from "./transformers/transformer";
|
||||
import type { OpenAIChat } from "./process";
|
||||
|
||||
async function encode(data:string):Promise<(number[]|Uint32Array)>{
|
||||
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
||||
let db = get(DataBase)
|
||||
if(db.aiModel === 'novellist'){
|
||||
return await tokenizeTransformers('naclbit/trin_tokenizer_v3',data)
|
||||
return await tokenizeWebTokenizers(data, 'novellist')
|
||||
}
|
||||
if(db.aiModel.startsWith('claude')){
|
||||
return await tokenizeWebTokenizers(data, 'claude')
|
||||
}
|
||||
return await tikJS(data)
|
||||
}
|
||||
|
||||
type tokenizerType = 'novellist'|'claude'
|
||||
|
||||
let tikParser:Tiktoken = null
|
||||
let tokenizersTokenizer:Tokenizer = null
|
||||
let tokenizersType:tokenizerType = null
|
||||
|
||||
async function tikJS(text:string) {
|
||||
if(!tikParser){
|
||||
@@ -28,6 +36,24 @@ async function tikJS(text:string) {
|
||||
return tikParser.encode(text)
|
||||
}
|
||||
|
||||
async function tokenizeWebTokenizers(text:string, type:tokenizerType) {
|
||||
if(type !== tokenizersType || !tokenizersTokenizer){
|
||||
const webTokenizer = await import('@mlc-ai/web-tokenizers')
|
||||
switch(type){
|
||||
case "novellist":
|
||||
tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece(
|
||||
await (await fetch("/token/trin/spiece.model")
|
||||
).arrayBuffer())
|
||||
case "claude":
|
||||
tokenizersTokenizer = await webTokenizer.Tokenizer.fromJSON(
|
||||
await (await fetch("/token/claude/claude.json")
|
||||
).arrayBuffer())
|
||||
}
|
||||
tokenizersType = type
|
||||
}
|
||||
return (tokenizersTokenizer.encode(text))
|
||||
}
|
||||
|
||||
export async function tokenizerChar(char:character) {
|
||||
const encoded = await encode(char.name + '\n' + char.firstMessage + '\n' + char.desc)
|
||||
return encoded.length
|
||||
|
||||
Reference in New Issue
Block a user