Merge branch 'main' into lualore

This commit is contained in:
kwaroran
2025-05-17 01:09:37 +09:00
committed by GitHub
24 changed files with 2573 additions and 682 deletions

View File

@@ -10,7 +10,7 @@ export interface alertData{
type: 'error'|'normal'|'none'|'ask'|'wait'|'selectChar'
|'input'|'toast'|'wait2'|'markdown'|'select'|'login'
|'tos'|'cardexport'|'requestdata'|'addchar'|'hypaV2'|'selectModule'
|'chatOptions'|'pukmakkurit'|'branches'|'hypaV3'|'progress',
|'chatOptions'|'pukmakkurit'|'branches'|'progress',
msg: string,
submsg?: string
}
@@ -319,10 +319,3 @@ export function showHypaV2Alert(){
'msg': ""
})
}
export function showHypaV3Alert(){
alertStoreImported.set({
'type': 'hypaV3',
'msg': ""
})
}

View File

@@ -1,12 +1,12 @@
import { get, writable } from "svelte/store";
import { type character, type MessageGenerationInfo, type Chat, changeToPreset, setCurrentChat } from "../storage/database.svelte";
import { type character, type MessageGenerationInfo, type Chat, type MessagePresetInfo, changeToPreset, setCurrentChat } from "../storage/database.svelte";
import { DBState } from '../stores.svelte';
import { CharEmotion, selectedCharID } from "../stores.svelte";
import { ChatTokenizer, tokenize, tokenizeNum } from "../tokenizer";
import { language } from "../../lang";
import { alertError, alertToast } from "../alert";
import { loadLoreBookV3Prompt } from "./lorebook.svelte";
import { findCharacterbyId, getAuthorNoteDefaultText, getPersonaPrompt, getUserName, isLastCharPunctuation, trimUntilPunctuation } from "../util";
import { findCharacterbyId, getAuthorNoteDefaultText, getPersonaPrompt, getUserName, isLastCharPunctuation, trimUntilPunctuation, parseToggleSyntax } from "../util";
import { requestChatData } from "./request";
import { stableDiff } from "./stableDiff";
import { processScript, processScriptFull, risuChatParser } from "./scripts";
@@ -30,7 +30,7 @@ import { runLuaEditTrigger } from "./lua";
import { getGlobalChatVar, parseChatML } from "../parser.svelte";
import { getModelInfo, LLMFlags } from "../model/modellist";
import { hypaMemoryV3 } from "./memory/hypav3";
import { getModuleAssets } from "./modules";
import { getModuleAssets, getModuleToggles } from "./modules";
import { getFileSrc, readImage } from "../globalApi.svelte";
export interface OpenAIChat{
@@ -186,6 +186,39 @@ export async function sendChat(chatProcessIndex = -1,arg:{
return v
})
// ─────────────────────────────────────────────────────────
// Snapshot preset name & toggles before sending a message.
// Ensures correct metadata is recorded, even if presets
// change immediately after clicking "send".
//
// Used later in promptInfo assembly (e.g. promptInfo.promptText)
// ─────────────────────────────────────────────────────────
let promptInfo: MessagePresetInfo = {}
let initialPresetNameForPromptInfo = null
let initialPromptTogglesForPromptInfo: {
key: string,
value: string,
}[] = []
if(DBState.db.promptInfoInsideChat){
initialPresetNameForPromptInfo = DBState.db.botPresets[DBState.db.botPresetsId]?.name ?? ''
initialPromptTogglesForPromptInfo = parseToggleSyntax(DBState.db.customPromptTemplateToggle + getModuleToggles())
.flatMap(toggle => {
const raw = DBState.db.globalChatVariables[`toggle_${toggle.key}`]
if (toggle.type === 'select' || toggle.type === 'text') {
return [{ key: toggle.value, value: toggle.options[raw] }];
}
if (raw === '1') {
return [{ key: toggle.value, value: 'ON' }];
}
return [];
})
promptInfo = {
promptName: initialPresetNameForPromptInfo,
promptToggles: initialPromptTogglesForPromptInfo,
}
}
// ─────────────────────────────────────────────────────────────
let currentChar:character
let caculatedChatTokens = 0
@@ -367,13 +400,15 @@ export async function sendChat(chatProcessIndex = -1,arg:{
if(currentChat.note){
unformated.authorNote.push({
role: 'system',
content: risuChatParser(currentChat.note, {chara: currentChar})
content: risuChatParser(currentChat.note, {chara: currentChar}),
memo: 'authornote'
})
}
else if(getAuthorNoteDefaultText() !== ''){
unformated.authorNote.push({
role: 'system',
content: risuChatParser(getAuthorNoteDefaultText(), {chara: currentChar})
content: risuChatParser(getAuthorNoteDefaultText(), {chara: currentChar}),
memo: 'authornote'
})
}
@@ -403,7 +438,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
unformated.description.push({
role: 'system',
content: description
content: description,
memo: 'description',
})
if(nowChatroom.type === 'group'){
@@ -424,7 +460,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
for(const lorebook of normalActives){
unformated.lorebook.push({
role: lorebook.role,
content: risuChatParser(lorebook.prompt, {chara: currentChar})
content: risuChatParser(lorebook.prompt, {chara: currentChar}),
memo: 'lore',
})
}
@@ -448,7 +485,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
if(DBState.db.personaPrompt){
unformated.personaPrompt.push({
role: 'system',
content: risuChatParser(getPersonaPrompt(), {chara: currentChar})
content: risuChatParser(getPersonaPrompt(), {chara: currentChar}),
memo: 'persona',
})
}
@@ -473,7 +511,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
for(const lorebook of postEverythingLorebooks){
unformated.postEverything.push({
role: lorebook.role,
content: risuChatParser(lorebook.prompt, {chara: currentChar})
content: risuChatParser(lorebook.prompt, {chara: currentChar}),
memo: 'postEverything',
})
}
@@ -1061,6 +1100,12 @@ export async function sendChat(chatProcessIndex = -1,arg:{
}
}
type MemoType = 'persona' | 'description' | 'authornote' | 'supaMemory';
const promptBodyMap: Record<MemoType, string[]> = { persona: [], description: [], authornote: [], supaMemory: [] };
function pushPromptInfoBody(memo: MemoType, fmt: string) {
promptBodyMap[memo].push(risuChatParser(fmt));
}
if(promptTemplate){
const template = promptTemplate
@@ -1071,6 +1116,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
if(card.innerFormat && pmt.length > 0){
for(let i=0;i<pmt.length;i++){
pmt[i].content = risuChatParser(positionParser(card.innerFormat), {chara: currentChar}).replace('{{slot}}', pmt[i].content)
if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
pushPromptInfoBody(card.type, card.innerFormat)
}
}
}
@@ -1082,6 +1131,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
if(card.innerFormat && pmt.length > 0){
for(let i=0;i<pmt.length;i++){
pmt[i].content = risuChatParser(positionParser(card.innerFormat), {chara: currentChar}).replace('{{slot}}', pmt[i].content)
if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
pushPromptInfoBody(card.type, card.innerFormat)
}
}
}
@@ -1093,6 +1146,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
if(card.innerFormat && pmt.length > 0){
for(let i=0;i<pmt.length;i++){
pmt[i].content = risuChatParser(positionParser(card.innerFormat), {chara: currentChar}).replace('{{slot}}', pmt[i].content || card.defaultText || '')
if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
pushPromptInfoBody(card.type, card.innerFormat)
}
}
}
@@ -1208,6 +1265,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
if(card.innerFormat && pmt.length > 0){
for(let i=0;i<pmt.length;i++){
pmt[i].content = risuChatParser(card.innerFormat, {chara: currentChar}).replace('{{slot}}', pmt[i].content)
if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
pushPromptInfoBody('supaMemory', card.innerFormat)
}
}
}
@@ -1327,6 +1388,29 @@ export async function sendChat(chatProcessIndex = -1,arg:{
return true
}
function isPromptMemo(m: string): m is MemoType {
return ['persona', 'description', 'authornote', 'supaMemory'].includes(m);
}
if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
const promptBodyInfo: OpenAIChat[] = formated.flatMap(format => {
if (isPromptMemo(format.memo)) {
return promptBodyMap[format.memo].map(content => ({
role: format.role,
content,
}))
}
if (format.memo == null) {
return [format]
}
return []
})
promptInfo.promptText = promptBodyInfo
}
let result = ''
let emoChanged = false
let resendChat = false
@@ -1353,6 +1437,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{
saying: currentChar.chaId,
time: Date.now(),
generationInfo,
promptInfo,
})
}
DBState.db.characters[selectedChar].chats[selectedChat].isStreaming = true
@@ -1432,7 +1517,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
data: result,
saying: currentChar.chaId,
time: Date.now(),
generationInfo
generationInfo,
promptInfo,
}
if(inlayResult.promise){
const p = await inlayResult.promise
@@ -1445,7 +1531,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
data: result,
saying: currentChar.chaId,
time: Date.now(),
generationInfo
generationInfo,
promptInfo,
})
const ind = DBState.db.characters[selectedChar].chats[selectedChat].message.length - 1
if(inlayResult.promise){

View File

@@ -15,6 +15,7 @@ import { Mutex } from "../mutex";
import { tokenize } from "../tokenizer";
import { fetchNative } from "../globalApi.svelte";
import { loadLoreBookV3Prompt } from './lorebook.svelte';
import { getPersonaPrompt, getUserName } from '../util';
let luaFactory:LuaFactory
let LuaSafeIds = new Set<string>()
@@ -461,6 +462,26 @@ export async function runLua(code:string, arg:{
return char.firstMessage
})
luaEngine.global.set('getPersonaName', (id:string) => {
if(!LuaSafeIds.has(id)){
return
}
return getUserName()
})
luaEngine.global.set('getPersonaDescription', (id:string) => {
if(!LuaSafeIds.has(id)){
return
}
const db = getDatabase()
const selectedChar = get(selectedCharID)
const char = db.characters[selectedChar]
return risuChatParser(getPersonaPrompt(), { chara: char })
})
luaEngine.global.set('getBackgroundEmbedding', async (id:string) => {
if(!LuaSafeIds.has(id)){
return

View File

@@ -1,27 +1,33 @@
import localforage from "localforage";
import { globalFetch } from "src/ts/globalApi.svelte";
import { runEmbedding } from "../transformers";
import { alertError } from "src/ts/alert";
import { appendLastPath } from "src/ts/util";
import { getDatabase } from "src/ts/storage/database.svelte";
export type HypaModel = 'custom'|'ada'|'openai3small'|'openai3large'|'MiniLM'|'MiniLMGPU'|'nomic'|'nomicGPU'|'bgeSmallEn'|'bgeSmallEnGPU'|'bgem3'|'bgem3GPU'|'multiMiniLM'|'multiMiniLMGPU'
export type HypaModel = 'ada'|'MiniLM'|'nomic'|'custom'|'nomicGPU'|'bgeSmallEn'|'bgeSmallEnGPU'|'bgem3'|'bgem3GPU'|'openai3small'|'openai3large'
const localModels = {
// In a typical environment, bge-m3 is a heavy model.
// If your GPU can't handle this model, you'll see errror below.
// Failed to execute 'mapAsync' on 'GPUBuffer': [Device] is lost
export const localModels = {
models: {
'MiniLM':'Xenova/all-MiniLM-L6-v2',
'MiniLMGPU': "Xenova/all-MiniLM-L6-v2",
'nomic':'nomic-ai/nomic-embed-text-v1.5',
'nomicGPU':'nomic-ai/nomic-embed-text-v1.5',
'bgeSmallEn': 'BAAI/bge-small-en-v1.5',
'bgeSmallEnGPU': 'BAAI/bge-small-en-v1.5',
'bgem3': 'BAAI/bge-m3',
'bgem3GPU': 'BAAI/bge-m3',
'bgeSmallEn': 'Xenova/bge-small-en-v1.5',
'bgeSmallEnGPU': 'Xenova/bge-small-en-v1.5',
'bgem3': 'Xenova/bge-m3',
'bgem3GPU': 'Xenova/bge-m3',
'multiMiniLM': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
'multiMiniLMGPU': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
},
gpuModels:[
'MiniLMGPU',
'nomicGPU',
'bgeSmallEnGPU',
'bgem3GPU'
'bgem3GPU',
'multiMiniLMGPU',
]
}
@@ -44,7 +50,7 @@ export class HypaProcesser{
else{
this.model = model
}
this.customEmbeddingUrl = customEmbeddingUrl || db.hypaCustomSettings.url
this.customEmbeddingUrl = customEmbeddingUrl?.trim() || db.hypaCustomSettings?.url?.trim() || ""
}
async embedDocuments(texts: string[]): Promise<VectorArray[]> {
@@ -80,10 +86,12 @@ export class HypaProcesser{
const db = getDatabase()
const fetchArgs = {
...(db.hypaCustomSettings.key ? {headers: {"Authorization": "Bearer " + db.hypaCustomSettings.key}} : {}),
headers: {
...(db.hypaCustomSettings?.key?.trim() ? {"Authorization": "Bearer " + db.hypaCustomSettings.key.trim()} : {})
},
body: {
"input": input,
...(db.hypaCustomSettings.model ? {"model": db.hypaCustomSettings.model} : {})
...(db.hypaCustomSettings?.model?.trim() ? {"model": db.hypaCustomSettings.model.trim()} : {})
}
};
@@ -99,7 +107,7 @@ export class HypaProcesser{
gf = await globalFetch("https://api.openai.com/v1/embeddings", {
headers: {
"Authorization": "Bearer " + db.supaMemoryKey || this.oaikey
"Authorization": "Bearer " + (this.oaikey?.trim() || db.supaMemoryKey?.trim())
},
body: {
"input": input,
@@ -134,7 +142,7 @@ export class HypaProcesser{
async addText(texts:string[]) {
const db = getDatabase()
const suffix = (this.model === 'custom' && db.hypaCustomSettings.model) ? `-${db.hypaCustomSettings.model}` : ""
const suffix = (this.model === 'custom' && db.hypaCustomSettings?.model?.trim()) ? `-${db.hypaCustomSettings.model.trim()}` : ""
for(let i=0;i<texts.length;i++){
const itm:memoryVector = await this.forage.getItem(texts[i] + '|' + this.model + suffix)
@@ -205,7 +213,8 @@ export class HypaProcesser{
return similarity(query1, query2)
}
}
function similarity(a:VectorArray, b:VectorArray) {
export function similarity(a:VectorArray, b:VectorArray) {
let dot = 0;
for(let i=0;i<a.length;i++){
dot += a[i] * b[i]
@@ -227,4 +236,4 @@ const chunkArray = <T>(arr: T[], chunkSize: number) =>
const chunk = chunks[chunkIndex] || [];
chunks[chunkIndex] = chunk.concat([elem]);
return chunks;
}, [] as T[][]);
}, [] as T[][]);

View File

@@ -0,0 +1,414 @@
import localforage from "localforage";
import { type HypaModel, localModels } from "./hypamemory";
import { TaskRateLimiter, TaskCanceledError } from "./taskRateLimiter";
import { runEmbedding } from "../transformers";
import { globalFetch } from "src/ts/globalApi.svelte";
import { getDatabase } from "src/ts/storage/database.svelte";
import { appendLastPath } from "src/ts/util";
export interface HypaProcessorV2Options {
model?: HypaModel;
customEmbeddingUrl?: string;
oaiKey?: string;
rateLimiter?: TaskRateLimiter;
}
export interface EmbeddingText<TMetadata> {
content: string;
metadata?: TMetadata;
}
export interface EmbeddingResult<TMetadata> extends EmbeddingText<TMetadata> {
embedding: EmbeddingVector;
}
export type EmbeddingVector = number[] | Float32Array;
export class HypaProcessorV2<TMetadata> {
private static readonly LOG_PREFIX = "[HypaProcessorV2]";
public readonly options: HypaProcessorV2Options;
public progressCallback: (queuedCount: number) => void = null;
private vectors: Map<string, EmbeddingResult<TMetadata>> = new Map();
private forage: LocalForage = localforage.createInstance({
name: "hypaVector",
});
public constructor(options?: HypaProcessorV2Options) {
const db = getDatabase();
this.options = {
model: db.hypaModel || "MiniLM",
customEmbeddingUrl: db.hypaCustomSettings?.url?.trim() || "",
oaiKey: db.supaMemoryKey?.trim() || "",
rateLimiter: new TaskRateLimiter(),
...options,
};
}
public async addTexts(ebdTexts: EmbeddingText<TMetadata>[]): Promise<void> {
await this.getEmbeds(ebdTexts, true);
}
public async similaritySearchScored(
query: string
): Promise<[EmbeddingResult<TMetadata>, number][]> {
const results = await this.similaritySearchScoredBatch([query]);
return results[0];
}
public async similaritySearchScoredBatch(
queries: string[]
): Promise<[EmbeddingResult<TMetadata>, number][][]> {
if (queries.length === 0) {
return [];
}
// Remove duplicate queries
const uniqueQueries = [...new Set(queries)];
// Convert queries to EmbeddingText array
const ebdTexts: EmbeddingText<TMetadata>[] = uniqueQueries.map((query) => ({
content: query,
}));
// Get query embeddings (don't save to memory)
const ebdResults = await this.getEmbeds(ebdTexts, false);
const scoredResultsMap = new Map<
string,
[EmbeddingResult<TMetadata>, number][]
>();
// Calculate similarity for each unique query
for (let i = 0; i < uniqueQueries.length; i++) {
const ebdResult = ebdResults[i];
const scoredVectors = Array.from(this.vectors.values())
.map((vector): [EmbeddingResult<TMetadata>, number] => [
vector,
this.similarity(ebdResult.embedding, vector.embedding),
])
.sort((a, b) => b[1] - a[1]);
scoredResultsMap.set(uniqueQueries[i], scoredVectors);
}
return queries.map((query) => scoredResultsMap.get(query));
}
private async getEmbeds(
ebdTexts: EmbeddingText<TMetadata>[],
saveToMemory: boolean = true
): Promise<EmbeddingResult<TMetadata>[]> {
if (ebdTexts.length === 0) {
return [];
}
const resultMap: Map<string, EmbeddingResult<TMetadata>> = new Map();
const toEmbed: EmbeddingText<TMetadata>[] = [];
// Load cache
const loadPromises = ebdTexts.map(async (item, index) => {
const { content, metadata } = item;
// Use if already in memory
if (this.vectors.has(content)) {
resultMap.set(content, this.vectors.get(content));
return;
}
try {
const cached = await this.forage.getItem<EmbeddingResult<TMetadata>>(
this.getCacheKey(content)
);
if (cached) {
// Debug log for cache hit
console.debug(
HypaProcessorV2.LOG_PREFIX,
`Cache hit for getting embedding ${index} with model ${this.options.model}`
);
// Add metadata
cached.metadata = metadata;
// Save to memory
if (saveToMemory) {
this.vectors.set(content, cached);
}
resultMap.set(content, cached);
} else {
toEmbed.push(item);
}
} catch (error) {
toEmbed.push(item);
}
});
await Promise.all(loadPromises);
if (toEmbed.length === 0) {
return ebdTexts.map((item) => resultMap.get(item.content));
}
// Chunking array
const chunkSize = await this.getOptimalChunkSize();
// Debug log for optimal chunk size
console.debug(
HypaProcessorV2.LOG_PREFIX,
`Optimal chunk size for ${this.options.model}: ${chunkSize}`
);
const chunks = this.chunkArray(toEmbed, chunkSize);
if (this.isLocalModel()) {
// Local model: Sequential processing
for (let i = 0; i < chunks.length; i++) {
// Progress callback
this.progressCallback?.(chunks.length - i - 1);
const chunk = chunks[i];
const embeddings = await this.getLocalEmbeds(
chunk.map((item) => item.content)
);
const savePromises = embeddings.map(async (embedding, j) => {
const { content, metadata } = chunk[j];
const ebdResult: EmbeddingResult<TMetadata> = {
content,
embedding,
metadata,
};
// Save to DB
await this.forage.setItem(this.getCacheKey(content), {
content,
embedding,
});
// Save to memory
if (saveToMemory) {
this.vectors.set(content, ebdResult);
}
resultMap.set(content, ebdResult);
});
await Promise.all(savePromises);
}
} else {
// API model: Parallel processing
const embeddingTasks = chunks.map((chunk) => {
const contents = chunk.map((item) => item.content);
return () => this.getAPIEmbeds(contents);
});
// Progress callback
this.options.rateLimiter.taskQueueChangeCallback = this.progressCallback;
const batchResult = await this.options.rateLimiter.executeBatch<
EmbeddingVector[]
>(embeddingTasks);
const errors: Error[] = [];
const chunksSavePromises = batchResult.results.map(async (result, i) => {
if (!result.success) {
errors.push(result.error);
return;
}
if (!result.data) {
errors.push(new Error("No embeddings found in the response."));
return;
}
const chunk = chunks[i];
const savePromises = result.data.map(async (embedding, j) => {
const { content, metadata } = chunk[j];
const ebdResult: EmbeddingResult<TMetadata> = {
content,
embedding,
metadata,
};
// Save to DB
await this.forage.setItem(this.getCacheKey(content), {
content,
embedding,
});
// Save to memory
if (saveToMemory) {
this.vectors.set(content, ebdResult);
}
resultMap.set(content, ebdResult);
});
await Promise.all(savePromises);
});
await Promise.all(chunksSavePromises);
// Throw major error if there are errors
if (errors.length > 0) {
const majorError =
errors.find((error) => !(error instanceof TaskCanceledError)) ||
errors[0];
throw majorError;
}
}
return ebdTexts.map((item) => resultMap.get(item.content));
}
private similarity(a: EmbeddingVector, b: EmbeddingVector): number {
let dot = 0;
let magA = 0;
let magB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
}
private getCacheKey(content: string): string {
const db = getDatabase();
const suffix =
this.options.model === "custom" && db.hypaCustomSettings?.model?.trim()
? `-${db.hypaCustomSettings.model.trim()}`
: "";
return `${content}|${this.options.model}${suffix}`;
}
private async getOptimalChunkSize(): Promise<number> {
// API
if (!this.isLocalModel()) {
return 50;
}
const isMobile = /Android|iPhone|iPad|iPod|webOS/i.test(
navigator.userAgent
);
// WebGPU
if ("gpu" in navigator) {
return isMobile ? 5 : 10;
}
// WASM
const cpuCores = navigator.hardwareConcurrency || 4;
const baseChunkSize = isMobile ? Math.floor(cpuCores / 2) : cpuCores;
return Math.min(baseChunkSize, 10);
}
private isLocalModel(): boolean {
return Object.keys(localModels.models).includes(this.options.model);
}
private chunkArray<T>(array: T[], size: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < array.length; i += size) {
chunks.push(array.slice(i, i + size));
}
return chunks;
}
private async getLocalEmbeds(contents: string[]): Promise<EmbeddingVector[]> {
const results: Float32Array[] = await runEmbedding(
contents,
localModels.models[this.options.model],
localModels.gpuModels.includes(this.options.model) ? "webgpu" : "wasm"
);
return results;
}
private async getAPIEmbeds(contents: string[]): Promise<EmbeddingVector[]> {
const db = getDatabase();
let response = null;
if (this.options.model === "custom") {
if (!this.options.customEmbeddingUrl) {
throw new Error("Custom model requires a Custom Server URL");
}
const replaceUrl = this.options.customEmbeddingUrl.endsWith("/embeddings")
? this.options.customEmbeddingUrl
: appendLastPath(this.options.customEmbeddingUrl, "embeddings");
const fetchArgs = {
headers: {
...(db.hypaCustomSettings?.key?.trim()
? { Authorization: "Bearer " + db.hypaCustomSettings.key.trim() }
: {}),
},
body: {
input: contents,
...(db.hypaCustomSettings?.model?.trim()
? { model: db.hypaCustomSettings.model.trim() }
: {}),
},
};
response = await globalFetch(replaceUrl, fetchArgs);
} else if (
["ada", "openai3small", "openai3large"].includes(this.options.model)
) {
const models = {
ada: "text-embedding-ada-002",
openai3small: "text-embedding-3-small",
openai3large: "text-embedding-3-large",
};
const fetchArgs = {
headers: {
Authorization:
"Bearer " +
(this.options.oaiKey?.trim() || db.supaMemoryKey?.trim()),
},
body: {
input: contents,
model: models[this.options.model],
},
};
response = await globalFetch(
"https://api.openai.com/v1/embeddings",
fetchArgs
);
} else {
throw new Error(`Unsupported model: ${this.options.model}`);
}
if (!response.ok || !response.data.data) {
throw new Error(JSON.stringify(response.data));
}
const embeddings: EmbeddingVector[] = response.data.data.map(
(item: { embedding: EmbeddingVector }) => {
if (!item.embedding) {
throw new Error("No embeddings found in the response.");
}
return item.embedding;
}
);
return embeddings;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,188 @@
export interface TaskRateLimiterOptions {
tasksPerMinute?: number;
maxConcurrentTasks?: number;
failFast?: boolean;
}
export interface BatchResult<TData> {
results: TaskResult<TData>[];
successCount: number;
failureCount: number;
allSucceeded: boolean;
}
export interface TaskResult<TData> {
success: boolean;
data?: TData;
error?: Error;
}
export class TaskRateLimiter {
private static readonly LOG_PREFIX = "[TaskRateLimiter]";
public readonly options: TaskRateLimiterOptions;
public taskQueueChangeCallback: (queuedCount: number) => void = null;
private timestamps: number[] = [];
private active: number = 0;
private queue: Array<{
task: () => Promise<TaskResult<any>>;
resolve: (result: TaskResult<any>) => void;
}> = [];
public constructor(options?: TaskRateLimiterOptions) {
this.options = {
tasksPerMinute: 20,
maxConcurrentTasks: 5,
failFast: true,
...options,
};
if (this.options.maxConcurrentTasks > this.options.tasksPerMinute) {
throw new Error("maxConcurrentTasks must be less than tasksPerMinute");
}
}
public async executeTask<TData>(
task: () => Promise<TData>
): Promise<TaskResult<TData>> {
return new Promise<TaskResult<TData>>((resolve) => {
this.queue.push({
task: async () => {
try {
const data = await task();
return { success: true, data };
} catch (error) {
return { success: false, error };
}
},
resolve,
});
this.taskQueueChangeCallback?.(this.queue.length);
this.processNextFromQueue();
});
}
public async executeBatch<TData>(
tasks: Array<() => Promise<TData>>
): Promise<BatchResult<TData>> {
const taskResults = await Promise.all(
tasks.map((task) => this.executeTask(task))
);
const successCount = taskResults.filter((r) => r.success).length;
const failureCount = taskResults.length - successCount;
return {
results: taskResults,
successCount,
failureCount,
allSucceeded: failureCount === 0,
};
}
public cancelPendingTasks(reason: string): void {
const error = new TaskCanceledError(reason);
while (this.queue.length > 0) {
const { resolve } = this.queue.shift();
resolve({ success: false, error });
}
this.taskQueueChangeCallback?.(this.queue.length);
}
public get queuedTaskCount(): number {
return this.queue.length;
}
private processNextFromQueue(): void {
if (this.queue.length === 0) return;
if (this.active >= this.options.maxConcurrentTasks) {
// Debug log for concurrency limit hit
console.debug(
TaskRateLimiter.LOG_PREFIX,
"Concurrency limit hit:",
"\nTasks in last minute:",
this.timestamps.length + "/" + this.options.tasksPerMinute,
"\nActive tasks:",
this.active + "/" + this.options.maxConcurrentTasks,
"\nWaiting tasks in queue:",
this.queue.length
);
return;
}
this.timestamps = this.timestamps.filter(
(ts) => Date.now() - ts <= 60 * 1000
);
if (this.timestamps.length >= this.options.tasksPerMinute) {
const oldestTimestamp = Math.min(...this.timestamps);
const timeUntilExpiry = Math.max(
100,
60 * 1000 - (Date.now() - oldestTimestamp)
);
// Debug log for rate limit hit
console.debug(
TaskRateLimiter.LOG_PREFIX,
"Rate limit hit:",
"\nTasks in last minute:",
this.timestamps.length + "/" + this.options.tasksPerMinute,
"\nActive tasks:",
this.active + "/" + this.options.maxConcurrentTasks,
"\nWaiting tasks in queue:",
this.queue.length,
"\nWill retry in:",
timeUntilExpiry + "ms"
);
// Wait until rate limit window advances before retrying
setTimeout(() => this.processNextFromQueue(), timeUntilExpiry);
return;
}
const { task, resolve } = this.queue.shift();
this.active++;
this.taskQueueChangeCallback?.(this.queue.length);
this.timestamps.push(Date.now());
// Debug log for task start
console.debug(
TaskRateLimiter.LOG_PREFIX,
"Task started:",
"\nTasks in last minute:",
this.timestamps.length + "/" + this.options.tasksPerMinute,
"\nActive tasks:",
this.active + "/" + this.options.maxConcurrentTasks,
"\nWaiting tasks in queue:",
this.queue.length
);
task()
.then((result) => {
resolve(result);
if (!result.success && this.options.failFast) {
this.cancelPendingTasks("Task canceled due to previous failure");
}
})
.finally(() => {
this.active--;
// Prevents call stack overflow while maintaining concurrency limits
queueMicrotask(() => this.processNextFromQueue());
});
}
}
export class TaskCanceledError extends Error {
public readonly name: string;
public constructor(message: string) {
super(message);
this.name = "TaskCanceledError";
}
}

View File

@@ -60,12 +60,19 @@ export const runEmbedding = async (texts: string[], model:EmbeddingModel = 'Xeno
console.log('running embedding')
let embeddingModelQuery = model + device
if(!extractor || embeddingModelQuery !== lastEmbeddingModelQuery){
// Dispose old extractor
if(extractor) {
await extractor.dispose()
}
extractor = await pipeline('feature-extraction', model, {
// Default dtype for webgpu is fp32, so we can use q8, which is the default dtype in wasm.
...(device === 'webgpu' ? { dtype: "q8" } : {}),
device: device,
progress_callback: (progress) => {
console.log(progress)
}
});
lastEmbeddingModelQuery = embeddingModelQuery
console.log('extractor loaded')
}
let result = await extractor(texts, { pooling: 'mean', normalize: true });

60
src/ts/process/webllm.ts Normal file
View File

@@ -0,0 +1,60 @@
import {
type ChatCompletionMessageParam,
type ChatCompletionRequestNonStreaming,
MLCEngine,
CreateMLCEngine,
} from "@mlc-ai/web-llm";
let engine: MLCEngine = null;
let lastModel: string = null;
export async function chatCompletion(
messages: { role: string; content: string }[],
model: string,
config: Record<string, any>
): Promise<string> {
try {
if (!engine || lastModel !== model) {
if (engine) engine.unload();
const initProgressCallback = (progress) => {
console.log("[WebLLM]", progress);
};
engine = await CreateMLCEngine(
model,
{
initProgressCallback,
},
{ context_window_size: 16384 }
);
lastModel = model;
}
const request: ChatCompletionRequestNonStreaming = {
messages: messages as ChatCompletionMessageParam[],
temperature: 0,
max_tokens: 4096,
...config,
};
const completion = await engine.chat.completions.create(request);
const content = completion.choices[0].message.content;
return content;
} catch (error) {
if (error instanceof Error) {
throw error;
}
throw new Error(JSON.stringify(error));
}
}
export async function unloadEngine(): Promise<void> {
if (!engine) return;
await engine.unload();
engine = null;
lastModel = null;
}

View File

@@ -11,6 +11,7 @@ import { prebuiltNAIpresets, prebuiltPresets } from '../process/templates/templa
import { defaultColorScheme, type ColorScheme } from '../gui/colorscheme';
import type { PromptItem, PromptSettings } from '../process/prompt';
import type { OobaChatCompletionRequestParams } from '../model/ooba';
import { type HypaV3Settings, type HypaV3Preset, createHypaV3Preset } from '../process/memory/hypav3'
export let appVer = "159.0.0"
export let webAppSubVer = ''
@@ -515,17 +516,21 @@ export function setDatabase(data:Database){
data.checkCorruption ??= true
data.OaiCompAPIKeys ??= {}
data.reasoningEffort ??= 0
data.hypaV3Settings = {
memoryTokensRatio: data.hypaV3Settings?.memoryTokensRatio ?? 0.2,
extraSummarizationRatio: data.hypaV3Settings?.extraSummarizationRatio ?? 0,
maxChatsPerSummary: data.hypaV3Settings?.maxChatsPerSummary ?? 4,
recentMemoryRatio: data.hypaV3Settings?.recentMemoryRatio ?? 0.4,
similarMemoryRatio: data.hypaV3Settings?.similarMemoryRatio ?? 0.4,
enableSimilarityCorrection: data.hypaV3Settings?.enableSimilarityCorrection ?? false,
preserveOrphanedMemory: data.hypaV3Settings?.preserveOrphanedMemory ?? false,
processRegexScript: data.hypaV3Settings?.processRegexScript ?? false,
doNotSummarizeUserMessage: data.hypaV3Settings?.doNotSummarizeUserMessage ?? false
data.hypaV3Presets ??= [
createHypaV3Preset("Default", {
summarizationPrompt: data.supaMemoryPrompt ? data.supaMemoryPrompt : "",
...data.hypaV3Settings
})
]
if (data.hypaV3Presets.length > 0) {
data.hypaV3Presets = data.hypaV3Presets.map((preset, i) =>
createHypaV3Preset(
preset.name || `Preset ${i + 1}`,
preset.settings || {}
)
)
}
data.hypaV3PresetId ??= 0
data.returnCSSError ??= true
data.useExperimentalGoogleTranslator ??= false
if(data.antiClaudeOverload){ //migration
@@ -535,7 +540,7 @@ export function setDatabase(data:Database){
data.hypaCustomSettings = {
url: data.hypaCustomSettings?.url ?? "",
key: data.hypaCustomSettings?.key ?? "",
model: data.hypaCustomSettings?.model ?? "",
model: data.hypaCustomSettings?.model ?? ""
}
data.doNotChangeSeperateModels ??= false
data.modelTools ??= []
@@ -960,17 +965,10 @@ export interface Database{
showPromptComparison:boolean
checkCorruption:boolean
hypaV3:boolean
hypaV3Settings: {
memoryTokensRatio: number
extraSummarizationRatio: number
maxChatsPerSummary: number
recentMemoryRatio: number
similarMemoryRatio: number
enableSimilarityCorrection: boolean
preserveOrphanedMemory: boolean
processRegexScript: boolean
doNotSummarizeUserMessage: boolean
}
hypaV3Settings: HypaV3Settings // legacy
hypaV3Presets: HypaV3Preset[]
hypaV3PresetId: number
showMenuHypaMemoryModal:boolean
OaiCompAPIKeys: {[key:string]:string}
inlayErrorResponse:boolean
reasoningEffort:number
@@ -1026,6 +1024,8 @@ export interface Database{
igpPrompt:string
useTokenizerCaching:boolean
showMenuHypaMemoryModal:boolean
promptInfoInsideChat:boolean
promptTextInfoInsideChat:boolean
}
interface SeparateParameters{
@@ -1504,6 +1504,7 @@ export interface Message{
chatId?:string
time?: number
generationInfo?: MessageGenerationInfo
promptInfo?: MessagePresetInfo
name?:string
otherUser?:boolean
}
@@ -1516,6 +1517,12 @@ export interface MessageGenerationInfo{
maxContext?: number
}
export interface MessagePresetInfo{
promptName?: string,
promptToggles?: {key: string, value: string}[],
promptText?: OpenAIChat[],
}
interface AINsettings{
top_p: number,
rep_pen: number,
@@ -1882,6 +1889,7 @@ import type { Parameter } from '../process/request';
import type { HypaModel } from '../process/memory/hypamemory';
import type { SerializableHypaV3Data } from '../process/memory/hypav3';
import { defaultHotkeys, type Hotkey } from '../defaulthotkeys';
import type { OpenAIChat } from '../process/index.svelte';
export async function downloadPreset(id:number, type:'json'|'risupreset'|'return' = 'json'){
saveCurrentPreset()

View File

@@ -50,6 +50,13 @@ export const alertStore = writable({
type: 'none',
msg: 'n',
} as alertData)
export const hypaV3ModalOpen = writable(false)
export const hypaV3ProgressStore = writable({
open: false,
miniMsg: '',
msg: '',
subMsg: '',
})
export const selIdState = $state({
selId: -1
})