[feat] trin & claude tokenizer + novellist support

This commit is contained in:
kwaroran
2023-06-25 21:43:28 +09:00
parent 9851b8647d
commit e0b04784da
7 changed files with 66 additions and 3 deletions

View File

@@ -79,6 +79,8 @@
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('textgen_webui')}}>Oobabooga WebUI</button>
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('palm2')}}>Google PaLM2</button>
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('kobold')}}>Kobold</button>
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('novellist')}}>Novellist</button>
{#if isTauri ||isNodeServer}
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('novelai')}}>NovelAI Clio</button>
<button class="hover:bg-selected px-6 py-2 text-lg" on:click={() => {changeModel('deepai')}}>DeepAI</button>

View File

@@ -586,6 +586,13 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
}
}
if(response.data.error){
return {
'type': 'fail',
'result': `${response.data.error.replace("token", "api key")}`
}
}
const result = response.data.data[0];
return {

View File

@@ -1,18 +1,26 @@
import type { Tiktoken } from "@dqbd/tiktoken";
import type { Tokenizer } from "@mlc-ai/web-tokenizers";
import { DataBase, type character } from "./storage/database";
import { get } from "svelte/store";
import { tokenizeTransformers } from "./transformers/transformer";
import type { OpenAIChat } from "./process";
async function encode(data:string):Promise<(number[]|Uint32Array)>{
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
let db = get(DataBase)
if(db.aiModel === 'novellist'){
return await tokenizeTransformers('naclbit/trin_tokenizer_v3',data)
return await tokenizeWebTokenizers(data, 'novellist')
}
if(db.aiModel.startsWith('claude')){
return await tokenizeWebTokenizers(data, 'claude')
}
return await tikJS(data)
}
type tokenizerType = 'novellist'|'claude'
let tikParser:Tiktoken = null
let tokenizersTokenizer:Tokenizer = null
let tokenizersType:tokenizerType = null
async function tikJS(text:string) {
if(!tikParser){
@@ -28,6 +36,24 @@ async function tikJS(text:string) {
return tikParser.encode(text)
}
async function tokenizeWebTokenizers(text:string, type:tokenizerType) {
if(type !== tokenizersType || !tokenizersTokenizer){
const webTokenizer = await import('@mlc-ai/web-tokenizers')
switch(type){
case "novellist":
tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece(
await (await fetch("/token/trin/spiece.model")
).arrayBuffer())
case "claude":
tokenizersTokenizer = await webTokenizer.Tokenizer.fromJSON(
await (await fetch("/token/claude/claude.json")
).arrayBuffer())
}
tokenizersType = type
}
return (tokenizersTokenizer.encode(text))
}
export async function tokenizerChar(char:character) {
const encoded = await encode(char.name + '\n' + char.firstMessage + '\n' + char.desc)
return encoded.length