This commit is contained in:
kwaroran
2025-04-28 15:47:11 +09:00
24 changed files with 400 additions and 206 deletions

View File

@@ -55,7 +55,8 @@ export async function loadLoreBookV3Prompt(){
const recursiveScanning = char.loreSettings?.recursiveScanning ?? true
let recursivePrompt:{
prompt: string,
source: string
source: string,
data: string
}[] = []
let matchLog:{
prompt: string,
@@ -75,23 +76,27 @@ export async function loadLoreBookV3Prompt(){
let mList:{
source:string
prompt:string
data:string
}[] = sliced.map((msg, i) => {
if(msg.role === 'user'){
return {
source: `message ${i} by user`,
prompt: `\x01{{${DBState.db.username}}}:` + msg.data + '\x01'
prompt: `\x01{{${DBState.db.username}}}:` + msg.data + '\x01',
data: msg.data
}
}
else{
return {
source: `message ${i} by char`,
prompt: `\x01{{${msg.name ?? (msg.saying ? findCharacterbyId(msg.saying)?.name : null) ?? char.name}}}:` + msg.data + '\x01'
prompt: `\x01{{${msg.name ?? (msg.saying ? findCharacterbyId(msg.saying)?.name : null) ?? char.name}}}:` + msg.data + '\x01',
data: msg.data
}
}
}).concat(recursivePrompt.map((msg) => {
return {
source: 'lorebook ' + msg.source,
prompt: msg.prompt
prompt: msg.prompt,
data: msg.data
}
}))
@@ -106,7 +111,7 @@ export async function loadLoreBookV3Prompt(){
arg.keys[0] = regexString.replace('/'+regexFlag,'')
try {
const regex = new RegExp(arg.keys[0],regexFlag)
const d = regex.test(mText.prompt)
const d = regex.test(mText.data)
if(d){
matchLog.push({
prompt: mText.prompt,
@@ -127,7 +132,8 @@ export async function loadLoreBookV3Prompt(){
mList = mList.map((m) => {
return {
source: m.source,
prompt: m.prompt.toLocaleLowerCase().replace(/\{\{\/\/(.+?)\}\}/g,'').replace(/\{\{comment:(.+?)\}\}/g,'')
prompt: m.prompt.toLocaleLowerCase().replace(/\{\{\/\/(.+?)\}\}/g,'').replace(/\{\{comment:(.+?)\}\}/g,''),
data: m.data.toLocaleLowerCase().replace(/\{\{\/\/(.+?)\}\}/g,'').replace(/\{\{comment:(.+?)\}\}/g,'')
}
})
@@ -135,7 +141,7 @@ export async function loadLoreBookV3Prompt(){
let allModeMatched = true
for(const m of mList){
let mText = m.prompt
let mText = m.data
if(arg.fullWordMatching){
const splited = mText.split(' ')
for(const key of arg.keys){
@@ -510,7 +516,7 @@ export async function importLoreBook(mode:'global'|'local'|'sglobal'){
}
}
interface CCLorebook{
export interface CCLorebook{
key:string[]
comment:string
content:string

View File

@@ -132,7 +132,7 @@ export async function generateAIImage(genPrompt:string, currentChar:character, n
"parameters": {
"params_version": 3,
"add_original_image": true,
"cfg_rescale": 0,
"cfg_rescale": db.NAIImgConfig.cfg_rescale,
"controlnet_strength": 1,
"dynamic_thresholding": false,
"n_samples": 1,
@@ -145,7 +145,7 @@ export async function generateAIImage(genPrompt:string, currentChar:character, n
"sm": false,
"sm_dyn": false,
"noise": db.NAIImgConfig.noise,
"noise_schedule": "native",
"noise_schedule": db.NAIImgConfig.noise_schedule,
"strength": db.NAIImgConfig.strength,
"ucPreset": 3,
"uncond_scale": 1,
@@ -435,7 +435,7 @@ export async function generateAIImage(genPrompt:string, currentChar:character, n
}
await new Promise(r => setTimeout(r, 1000))
} // Check history until the generation is complete.
const genImgInfo = Object.values(item.outputs).flatMap((output: any) => output.images)[0];
const genImgInfo = Object.values(item.outputs).flatMap((output: any) => output.images || [])[0];
const imgResponse = await fetchNative(createUrl('/view', {
filename: genImgInfo.filename,

View File

@@ -255,8 +255,10 @@ export function setDatabase(data:Database){
width:512,
height:768,
sampler:"k_dpmpp_sde",
noise_schedule:"native",
steps:28,
scale:5,
cfg_rescale: 0,
sm:true,
sm_dyn:false,
noise:0.0,
@@ -1023,6 +1025,7 @@ export interface Database{
}[]
igpPrompt:string
useTokenizerCaching:boolean
showMenuHypaMemoryModal:boolean
}
interface SeparateParameters{
@@ -1408,8 +1411,10 @@ export interface NAIImgConfig{
width:number,
height:number,
sampler:string,
noise_schedule:string,
steps:number,
scale:number,
cfg_rescale:number,
sm:boolean,
sm_dyn:boolean,
noise:number,

View File

@@ -6,9 +6,27 @@ import { supportsInlayImage } from "./process/files/inlays";
import { risuChatParser } from "./parser.svelte";
import { tokenizeGGUFModel } from "./process/models/local";
import { globalFetch } from "./globalApi.svelte";
import { getModelInfo, LLMTokenizer } from "./model/modellist";
import { getModelInfo, LLMTokenizer, type LLMModel } from "./model/modellist";
import { pluginV2 } from "./plugins/plugins";
import type { GemmaTokenizer } from "@huggingface/transformers";
import { LRUMap } from 'mnemonist';
const MAX_CACHE_SIZE = 1500;
const encodeCache = new LRUMap<string, number[] | Uint32Array | Int32Array>(MAX_CACHE_SIZE);
function getHash(
data: string,
aiModel: string,
customTokenizer: string,
currentPluginProvider: string,
googleClaudeTokenizing: boolean,
modelInfo: LLMModel,
pluginTokenizer: string
): string {
const combined = `${data}::${aiModel}::${customTokenizer}::${currentPluginProvider}::${googleClaudeTokenizing ? '1' : '0'}::${modelInfo.tokenizer}::${pluginTokenizer}`;
return combined;
}
export const tokenizerList = [
@@ -25,100 +43,114 @@ export const tokenizerList = [
] as const
export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
let db = getDatabase()
const db = getDatabase();
const modelInfo = getModelInfo(db.aiModel);
const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none";
let cacheKey = ''
if(db.useTokenizerCaching){
cacheKey = getHash(
data,
db.aiModel,
db.customTokenizer,
db.currentPluginProvider,
db.googleClaudeTokenizing,
modelInfo,
pluginTokenizer
);
const cachedResult = encodeCache.get(cacheKey);
if (cachedResult !== undefined) {
return cachedResult;
}
}
let result: number[] | Uint32Array | Int32Array;
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
switch(db.customTokenizer){
case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral')
result = await tokenizeWebTokenizers(data, 'mistral'); break;
case 'llama':
return await tokenizeWebTokenizers(data, 'llama')
result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai')
result = await tokenizeWebTokenizers(data, 'novelai'); break;
case 'claude':
return await tokenizeWebTokenizers(data, 'claude')
result = await tokenizeWebTokenizers(data, 'claude'); break;
case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist')
result = await tokenizeWebTokenizers(data, 'novellist'); break;
case 'llama3':
return await tokenizeWebTokenizers(data, 'llama')
result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'gemma':
return await gemmaTokenize(data)
result = await gemmaTokenize(data); break;
case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere')
result = await tokenizeWebTokenizers(data, 'cohere'); break;
case 'deepseek':
return await tokenizeWebTokenizers(data, 'DeepSeek')
result = await tokenizeWebTokenizers(data, 'DeepSeek'); break;
default:
return await tikJS(data, 'o200k_base')
result = await tikJS(data, 'o200k_base'); break;
}
}
const modelInfo = getModelInfo(db.aiModel)
if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){
const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer
switch(tokenizer){
} else if (db.aiModel === 'custom' && pluginTokenizer) {
switch(pluginTokenizer){
case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral')
result = await tokenizeWebTokenizers(data, 'mistral'); break;
case 'llama':
return await tokenizeWebTokenizers(data, 'llama')
result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai')
result = await tokenizeWebTokenizers(data, 'novelai'); break;
case 'claude':
return await tokenizeWebTokenizers(data, 'claude')
result = await tokenizeWebTokenizers(data, 'claude'); break;
case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist')
result = await tokenizeWebTokenizers(data, 'novellist'); break;
case 'llama3':
return await tokenizeWebTokenizers(data, 'llama')
result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'gemma':
return await gemmaTokenize(data)
result = await gemmaTokenize(data); break;
case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere')
result = await tokenizeWebTokenizers(data, 'cohere'); break;
case 'o200k_base':
return await tikJS(data, 'o200k_base')
result = await tikJS(data, 'o200k_base'); break;
case 'cl100k_base':
return await tikJS(data, 'cl100k_base')
result = await tikJS(data, 'cl100k_base'); break;
case 'custom':
return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]
result = await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]; break;
default:
return await tikJS(data, 'o200k_base')
result = await tikJS(data, 'o200k_base'); break;
}
}
// Fallback
if (result === undefined) {
if(modelInfo.tokenizer === LLMTokenizer.NovelList){
result = await tokenizeWebTokenizers(data, 'novellist');
} else if(modelInfo.tokenizer === LLMTokenizer.Claude){
result = await tokenizeWebTokenizers(data, 'claude');
} else if(modelInfo.tokenizer === LLMTokenizer.NovelAI){
result = await tokenizeWebTokenizers(data, 'novelai');
} else if(modelInfo.tokenizer === LLMTokenizer.Mistral){
result = await tokenizeWebTokenizers(data, 'mistral');
} else if(modelInfo.tokenizer === LLMTokenizer.Llama){
result = await tokenizeWebTokenizers(data, 'llama');
} else if(modelInfo.tokenizer === LLMTokenizer.Local){
result = await tokenizeGGUFModel(data);
} else if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){
result = await tikJS(data, 'o200k_base');
} else if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){
result = await tokenizeGoogleCloud(data);
} else if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
result = await gemmaTokenize(data);
} else if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
result = await tokenizeWebTokenizers(data, 'DeepSeek');
} else if(modelInfo.tokenizer === LLMTokenizer.Cohere){
result = await tokenizeWebTokenizers(data, 'cohere');
} else {
result = await tikJS(data);
}
}
if(modelInfo.tokenizer === LLMTokenizer.NovelList){
const nv= await tokenizeWebTokenizers(data, 'novellist')
return nv
}
if(modelInfo.tokenizer === LLMTokenizer.Claude){
return await tokenizeWebTokenizers(data, 'claude')
}
if(modelInfo.tokenizer === LLMTokenizer.NovelAI){
return await tokenizeWebTokenizers(data, 'novelai')
}
if(modelInfo.tokenizer === LLMTokenizer.Mistral){
return await tokenizeWebTokenizers(data, 'mistral')
}
if(modelInfo.tokenizer === LLMTokenizer.Llama){
return await tokenizeWebTokenizers(data, 'llama')
}
if(modelInfo.tokenizer === LLMTokenizer.Local){
return await tokenizeGGUFModel(data)
}
if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){
return await tikJS(data, 'o200k_base')
}
if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){
return await tokenizeGoogleCloud(data)
}
if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
return await gemmaTokenize(data)
}
if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
return await tokenizeWebTokenizers(data, 'DeepSeek')
}
if(modelInfo.tokenizer === LLMTokenizer.Cohere){
return await tokenizeWebTokenizers(data, 'cohere')
if(db.useTokenizerCaching){
encodeCache.set(cacheKey, result);
}
return await tikJS(data)
return result;
}
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek'
@@ -177,6 +209,7 @@ async function gemmaTokenize(text:string) {
async function tikJS(text:string, model='cl100k_base') {
if(!tikParser || lastTikModel !== model){
tikParser?.free()
if(model === 'cl100k_base'){
const {Tiktoken} = await import('@dqbd/tiktoken')
const cl100k_base = await import("@dqbd/tiktoken/encoders/cl100k_base.json");