Merge branch 'main' into lualore

2025-05-17 01:09:37 +09:00
parent a758b574f5 1a071a7be5
commit e5fededc69
24 changed files with 2573 additions and 682 deletions
--- a/src/ts/alert.ts
+++ b/src/ts/alert.ts
@@ -10,7 +10,7 @@ export interface alertData{
    type: 'error'|'normal'|'none'|'ask'|'wait'|'selectChar'
            |'input'|'toast'|'wait2'|'markdown'|'select'|'login'
            |'tos'|'cardexport'|'requestdata'|'addchar'|'hypaV2'|'selectModule'
-            |'chatOptions'|'pukmakkurit'|'branches'|'hypaV3'|'progress',
+            |'chatOptions'|'pukmakkurit'|'branches'|'progress',
    msg: string,
    submsg?: string
 }
@@ -319,10 +319,3 @@ export function showHypaV2Alert(){
        'msg': ""
    })
 }
-
-export function showHypaV3Alert(){
-    alertStoreImported.set({
-        'type': 'hypaV3',
-        'msg': ""
-    })
-}
--- a/src/ts/process/index.svelte.ts
+++ b/src/ts/process/index.svelte.ts
@@ -1,12 +1,12 @@
 import { get, writable } from "svelte/store";
-import { type character, type MessageGenerationInfo, type Chat, changeToPreset, setCurrentChat } from "../storage/database.svelte";
+import { type character, type MessageGenerationInfo, type Chat, type MessagePresetInfo, changeToPreset, setCurrentChat } from "../storage/database.svelte";
 import { DBState } from '../stores.svelte';
 import { CharEmotion, selectedCharID } from "../stores.svelte";
 import { ChatTokenizer, tokenize, tokenizeNum } from "../tokenizer";
 import { language } from "../../lang";
 import { alertError, alertToast } from "../alert";
 import { loadLoreBookV3Prompt } from "./lorebook.svelte";
-import { findCharacterbyId, getAuthorNoteDefaultText, getPersonaPrompt, getUserName, isLastCharPunctuation, trimUntilPunctuation } from "../util";
+import { findCharacterbyId, getAuthorNoteDefaultText, getPersonaPrompt, getUserName, isLastCharPunctuation, trimUntilPunctuation, parseToggleSyntax } from "../util";
 import { requestChatData } from "./request";
 import { stableDiff } from "./stableDiff";
 import { processScript, processScriptFull, risuChatParser } from "./scripts";
@@ -30,7 +30,7 @@ import { runLuaEditTrigger } from "./lua";
 import { getGlobalChatVar, parseChatML } from "../parser.svelte";
 import { getModelInfo, LLMFlags } from "../model/modellist";
 import { hypaMemoryV3 } from "./memory/hypav3";
-import { getModuleAssets } from "./modules";
+import { getModuleAssets, getModuleToggles } from "./modules";
 import { getFileSrc, readImage } from "../globalApi.svelte";

 export interface OpenAIChat{
@@ -186,6 +186,39 @@ export async function sendChat(chatProcessIndex = -1,arg:{
        return v
    })
    
+// ─────────────────────────────────────────────────────────
+// Snapshot preset name & toggles before sending a message.
+// Ensures correct metadata is recorded, even if presets
+// change immediately after clicking "send".
+//
+// Used later in promptInfo assembly (e.g. promptInfo.promptText)
+// ─────────────────────────────────────────────────────────
+    let promptInfo: MessagePresetInfo = {}
+    let initialPresetNameForPromptInfo = null
+    let initialPromptTogglesForPromptInfo: {
+        key: string,
+        value: string,
+    }[] = []
+    if(DBState.db.promptInfoInsideChat){
+        initialPresetNameForPromptInfo = DBState.db.botPresets[DBState.db.botPresetsId]?.name ?? ''
+        initialPromptTogglesForPromptInfo = parseToggleSyntax(DBState.db.customPromptTemplateToggle + getModuleToggles())
+            .flatMap(toggle => {
+                const raw = DBState.db.globalChatVariables[`toggle_${toggle.key}`]
+                if (toggle.type === 'select' || toggle.type === 'text') {
+                    return [{ key: toggle.value, value: toggle.options[raw] }];
+                }
+                if (raw === '1') {
+                    return [{ key: toggle.value, value: 'ON' }];
+                }
+                return [];
+            })
+
+        promptInfo = {
+            promptName: initialPresetNameForPromptInfo,
+            promptToggles: initialPromptTogglesForPromptInfo,
+        }
+    }
+// ─────────────────────────────────────────────────────────────

    let currentChar:character
    let caculatedChatTokens = 0
@@ -367,13 +400,15 @@ export async function sendChat(chatProcessIndex = -1,arg:{
    if(currentChat.note){
        unformated.authorNote.push({
            role: 'system',
-            content: risuChatParser(currentChat.note, {chara: currentChar})
+            content: risuChatParser(currentChat.note, {chara: currentChar}),
+            memo: 'authornote'
        })
    }
    else if(getAuthorNoteDefaultText() !== ''){
        unformated.authorNote.push({
            role: 'system',
-            content: risuChatParser(getAuthorNoteDefaultText(), {chara: currentChar})
+            content: risuChatParser(getAuthorNoteDefaultText(), {chara: currentChar}),
+            memo: 'authornote'
        })
    }

@@ -403,7 +438,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{

        unformated.description.push({
            role: 'system',
-            content: description
+            content: description,
+            memo: 'description',
        })

        if(nowChatroom.type === 'group'){
@@ -424,7 +460,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
    for(const lorebook of normalActives){
        unformated.lorebook.push({
            role: lorebook.role,
-            content: risuChatParser(lorebook.prompt, {chara: currentChar})
+            content: risuChatParser(lorebook.prompt, {chara: currentChar}),
+            memo: 'lore',
        })
    }

@@ -448,7 +485,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
    if(DBState.db.personaPrompt){
        unformated.personaPrompt.push({
            role: 'system',
-            content: risuChatParser(getPersonaPrompt(), {chara: currentChar})
+            content: risuChatParser(getPersonaPrompt(), {chara: currentChar}),
+            memo: 'persona',
        })
    }
    
@@ -473,7 +511,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
    for(const lorebook of postEverythingLorebooks){
        unformated.postEverything.push({
            role: lorebook.role,
-            content: risuChatParser(lorebook.prompt, {chara: currentChar})
+            content: risuChatParser(lorebook.prompt, {chara: currentChar}),
+            memo: 'postEverything',
        })
    }

@@ -1061,6 +1100,12 @@ export async function sendChat(chatProcessIndex = -1,arg:{
        }
    }

+    type MemoType = 'persona' | 'description' | 'authornote' | 'supaMemory';
+    const promptBodyMap: Record<MemoType, string[]> = { persona: [], description: [], authornote: [], supaMemory: [] };
+    function pushPromptInfoBody(memo: MemoType, fmt: string) {
+        promptBodyMap[memo].push(risuChatParser(fmt));
+    }
+
    if(promptTemplate){
        const template = promptTemplate

@@ -1071,6 +1116,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                    if(card.innerFormat && pmt.length > 0){
                        for(let i=0;i<pmt.length;i++){
                            pmt[i].content = risuChatParser(positionParser(card.innerFormat), {chara: currentChar}).replace('{{slot}}', pmt[i].content)
+
+                            if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
+                                pushPromptInfoBody(card.type, card.innerFormat)
+                            }
                        }
                    }

@@ -1082,6 +1131,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                    if(card.innerFormat && pmt.length > 0){
                        for(let i=0;i<pmt.length;i++){
                            pmt[i].content = risuChatParser(positionParser(card.innerFormat), {chara: currentChar}).replace('{{slot}}', pmt[i].content)
+                            
+                            if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
+                                pushPromptInfoBody(card.type, card.innerFormat)
+                            }
                        }
                    }

@@ -1093,6 +1146,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                    if(card.innerFormat && pmt.length > 0){
                        for(let i=0;i<pmt.length;i++){
                            pmt[i].content = risuChatParser(positionParser(card.innerFormat), {chara: currentChar}).replace('{{slot}}', pmt[i].content || card.defaultText || '')
+                            
+                            if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
+                                pushPromptInfoBody(card.type, card.innerFormat)
+                            }
                        }
                    }

@@ -1208,6 +1265,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                    if(card.innerFormat && pmt.length > 0){
                        for(let i=0;i<pmt.length;i++){
                            pmt[i].content = risuChatParser(card.innerFormat, {chara: currentChar}).replace('{{slot}}', pmt[i].content)
+
+                            if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
+                                pushPromptInfoBody('supaMemory', card.innerFormat)
+                            }
                        }
                    }

@@ -1327,6 +1388,29 @@ export async function sendChat(chatProcessIndex = -1,arg:{
        return true
    }

+    
+    function isPromptMemo(m: string): m is MemoType {
+        return ['persona', 'description', 'authornote', 'supaMemory'].includes(m);
+    }
+    if(DBState.db.promptInfoInsideChat && DBState.db.promptTextInfoInsideChat){
+        const promptBodyInfo: OpenAIChat[] = formated.flatMap(format => {
+            if (isPromptMemo(format.memo)) {
+                return promptBodyMap[format.memo].map(content => ({
+                    role: format.role,
+                    content,
+                }))
+            }
+
+            if (format.memo == null) {
+                return [format]
+            }
+
+            return []
+        })
+        
+        promptInfo.promptText = promptBodyInfo
+    }
+
    let result = ''
    let emoChanged = false
    let resendChat = false
@@ -1353,6 +1437,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                saying: currentChar.chaId,
                time: Date.now(),
                generationInfo,
+                promptInfo,
            })
        }
        DBState.db.characters[selectedChar].chats[selectedChat].isStreaming = true
@@ -1432,7 +1517,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                    data: result,
                    saying: currentChar.chaId,
                    time: Date.now(),
-                    generationInfo
+                    generationInfo,
+                    promptInfo,
                }       
                if(inlayResult.promise){
                    const p = await inlayResult.promise
@@ -1445,7 +1531,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                    data: result,
                    saying: currentChar.chaId,
                    time: Date.now(),
-                    generationInfo
+                    generationInfo,
+                    promptInfo,
                })
                const ind = DBState.db.characters[selectedChar].chats[selectedChat].message.length - 1
                if(inlayResult.promise){
--- a/src/ts/process/lua.ts
+++ b/src/ts/process/lua.ts
@@ -15,6 +15,7 @@ import { Mutex } from "../mutex";
 import { tokenize } from "../tokenizer";
 import { fetchNative } from "../globalApi.svelte";
 import { loadLoreBookV3Prompt } from './lorebook.svelte';
+import { getPersonaPrompt, getUserName } from '../util';

 let luaFactory:LuaFactory
 let LuaSafeIds = new Set<string>()
@@ -461,6 +462,26 @@ export async function runLua(code:string, arg:{
                return char.firstMessage
            })

+            luaEngine.global.set('getPersonaName', (id:string) => {
+                if(!LuaSafeIds.has(id)){
+                    return
+                }
+
+                return getUserName()
+            })
+
+            luaEngine.global.set('getPersonaDescription', (id:string) => {
+                if(!LuaSafeIds.has(id)){
+                    return
+                }
+                
+                const db = getDatabase()
+                const selectedChar = get(selectedCharID)
+                const char = db.characters[selectedChar]
+
+                return risuChatParser(getPersonaPrompt(), { chara: char })
+            })
+
            luaEngine.global.set('getBackgroundEmbedding', async (id:string) => {
                if(!LuaSafeIds.has(id)){
                    return
--- a/src/ts/process/memory/hypamemory.ts
+++ b/src/ts/process/memory/hypamemory.ts
@@ -1,27 +1,33 @@
 import localforage from "localforage";
 import { globalFetch } from "src/ts/globalApi.svelte";
 import { runEmbedding } from "../transformers";
-import { alertError } from "src/ts/alert";
 import { appendLastPath } from "src/ts/util";
 import { getDatabase } from "src/ts/storage/database.svelte";

+export type HypaModel = 'custom'|'ada'|'openai3small'|'openai3large'|'MiniLM'|'MiniLMGPU'|'nomic'|'nomicGPU'|'bgeSmallEn'|'bgeSmallEnGPU'|'bgem3'|'bgem3GPU'|'multiMiniLM'|'multiMiniLMGPU'

-export type HypaModel = 'ada'|'MiniLM'|'nomic'|'custom'|'nomicGPU'|'bgeSmallEn'|'bgeSmallEnGPU'|'bgem3'|'bgem3GPU'|'openai3small'|'openai3large'
-
-const localModels = {
+// In a typical environment, bge-m3 is a heavy model.
+// If your GPU can't handle this model, you'll see errror below.
+// Failed to execute 'mapAsync' on 'GPUBuffer': [Device] is lost
+export const localModels = {
    models: {
        'MiniLM':'Xenova/all-MiniLM-L6-v2',
+        'MiniLMGPU': "Xenova/all-MiniLM-L6-v2",
        'nomic':'nomic-ai/nomic-embed-text-v1.5',
        'nomicGPU':'nomic-ai/nomic-embed-text-v1.5',
-        'bgeSmallEn': 'BAAI/bge-small-en-v1.5',
-        'bgeSmallEnGPU': 'BAAI/bge-small-en-v1.5',
-        'bgem3': 'BAAI/bge-m3',
-        'bgem3GPU': 'BAAI/bge-m3',
+        'bgeSmallEn': 'Xenova/bge-small-en-v1.5',
+        'bgeSmallEnGPU': 'Xenova/bge-small-en-v1.5',
+        'bgem3': 'Xenova/bge-m3',
+        'bgem3GPU': 'Xenova/bge-m3',
+        'multiMiniLM': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
+        'multiMiniLMGPU': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
    },
    gpuModels:[
+        'MiniLMGPU',
        'nomicGPU',
        'bgeSmallEnGPU',
-        'bgem3GPU'
+        'bgem3GPU',
+        'multiMiniLMGPU',
    ]
 }

@@ -44,7 +50,7 @@ export class HypaProcesser{
        else{
            this.model = model
        }
-        this.customEmbeddingUrl = customEmbeddingUrl || db.hypaCustomSettings.url
+        this.customEmbeddingUrl = customEmbeddingUrl?.trim() || db.hypaCustomSettings?.url?.trim() || ""
    }

    async embedDocuments(texts: string[]): Promise<VectorArray[]> {
@@ -80,10 +86,12 @@ export class HypaProcesser{

            const db = getDatabase()
            const fetchArgs = {
-                ...(db.hypaCustomSettings.key ? {headers: {"Authorization": "Bearer " + db.hypaCustomSettings.key}} : {}),
+                headers: {
+                    ...(db.hypaCustomSettings?.key?.trim() ? {"Authorization": "Bearer " + db.hypaCustomSettings.key.trim()} : {})
+                },
                body: {
                    "input": input,
-                    ...(db.hypaCustomSettings.model ? {"model": db.hypaCustomSettings.model} : {})
+                    ...(db.hypaCustomSettings?.model?.trim() ? {"model": db.hypaCustomSettings.model.trim()} : {})
                }
            };
 
@@ -99,7 +107,7 @@ export class HypaProcesser{

            gf = await globalFetch("https://api.openai.com/v1/embeddings", {
                headers: {
-                    "Authorization": "Bearer " + db.supaMemoryKey || this.oaikey
+                    "Authorization": "Bearer " + (this.oaikey?.trim() || db.supaMemoryKey?.trim())
                },
                body: {
                    "input": input,
@@ -134,7 +142,7 @@ export class HypaProcesser{
    
    async addText(texts:string[]) {
        const db = getDatabase()
-        const suffix = (this.model === 'custom' && db.hypaCustomSettings.model) ? `-${db.hypaCustomSettings.model}` : ""
+        const suffix = (this.model === 'custom' && db.hypaCustomSettings?.model?.trim()) ? `-${db.hypaCustomSettings.model.trim()}` : ""

        for(let i=0;i<texts.length;i++){
            const itm:memoryVector = await this.forage.getItem(texts[i] + '|' + this.model + suffix)
@@ -205,7 +213,8 @@ export class HypaProcesser{
        return similarity(query1, query2)
    }
 }
-function similarity(a:VectorArray, b:VectorArray) {    
+
+export function similarity(a:VectorArray, b:VectorArray) {    
    let dot = 0;
    for(let i=0;i<a.length;i++){
        dot += a[i] * b[i]
@@ -227,4 +236,4 @@ const chunkArray = <T>(arr: T[], chunkSize: number) =>
        const chunk = chunks[chunkIndex] || [];
        chunks[chunkIndex] = chunk.concat([elem]);
        return chunks;
-}, [] as T[][]);
+}, [] as T[][]);
--- a/src/ts/process/memory/hypamemoryv2.ts
+++ b/src/ts/process/memory/hypamemoryv2.ts
@@ -0,0 +1,414 @@
+import localforage from "localforage";
+import { type HypaModel, localModels } from "./hypamemory";
+import { TaskRateLimiter, TaskCanceledError } from "./taskRateLimiter";
+import { runEmbedding } from "../transformers";
+import { globalFetch } from "src/ts/globalApi.svelte";
+import { getDatabase } from "src/ts/storage/database.svelte";
+import { appendLastPath } from "src/ts/util";
+
+export interface HypaProcessorV2Options {
+  model?: HypaModel;
+  customEmbeddingUrl?: string;
+  oaiKey?: string;
+  rateLimiter?: TaskRateLimiter;
+}
+
+export interface EmbeddingText<TMetadata> {
+  content: string;
+  metadata?: TMetadata;
+}
+
+export interface EmbeddingResult<TMetadata> extends EmbeddingText<TMetadata> {
+  embedding: EmbeddingVector;
+}
+
+export type EmbeddingVector = number[] | Float32Array;
+
+export class HypaProcessorV2<TMetadata> {
+  private static readonly LOG_PREFIX = "[HypaProcessorV2]";
+  public readonly options: HypaProcessorV2Options;
+  public progressCallback: (queuedCount: number) => void = null;
+  private vectors: Map<string, EmbeddingResult<TMetadata>> = new Map();
+  private forage: LocalForage = localforage.createInstance({
+    name: "hypaVector",
+  });
+
+  public constructor(options?: HypaProcessorV2Options) {
+    const db = getDatabase();
+
+    this.options = {
+      model: db.hypaModel || "MiniLM",
+      customEmbeddingUrl: db.hypaCustomSettings?.url?.trim() || "",
+      oaiKey: db.supaMemoryKey?.trim() || "",
+      rateLimiter: new TaskRateLimiter(),
+      ...options,
+    };
+  }
+
+  public async addTexts(ebdTexts: EmbeddingText<TMetadata>[]): Promise<void> {
+    await this.getEmbeds(ebdTexts, true);
+  }
+
+  public async similaritySearchScored(
+    query: string
+  ): Promise<[EmbeddingResult<TMetadata>, number][]> {
+    const results = await this.similaritySearchScoredBatch([query]);
+    return results[0];
+  }
+
+  public async similaritySearchScoredBatch(
+    queries: string[]
+  ): Promise<[EmbeddingResult<TMetadata>, number][][]> {
+    if (queries.length === 0) {
+      return [];
+    }
+
+    // Remove duplicate queries
+    const uniqueQueries = [...new Set(queries)];
+
+    // Convert queries to EmbeddingText array
+    const ebdTexts: EmbeddingText<TMetadata>[] = uniqueQueries.map((query) => ({
+      content: query,
+    }));
+
+    // Get query embeddings (don't save to memory)
+    const ebdResults = await this.getEmbeds(ebdTexts, false);
+
+    const scoredResultsMap = new Map<
+      string,
+      [EmbeddingResult<TMetadata>, number][]
+    >();
+
+    // Calculate similarity for each unique query
+    for (let i = 0; i < uniqueQueries.length; i++) {
+      const ebdResult = ebdResults[i];
+
+      const scoredVectors = Array.from(this.vectors.values())
+        .map((vector): [EmbeddingResult<TMetadata>, number] => [
+          vector,
+          this.similarity(ebdResult.embedding, vector.embedding),
+        ])
+        .sort((a, b) => b[1] - a[1]);
+
+      scoredResultsMap.set(uniqueQueries[i], scoredVectors);
+    }
+
+    return queries.map((query) => scoredResultsMap.get(query));
+  }
+
+  private async getEmbeds(
+    ebdTexts: EmbeddingText<TMetadata>[],
+    saveToMemory: boolean = true
+  ): Promise<EmbeddingResult<TMetadata>[]> {
+    if (ebdTexts.length === 0) {
+      return [];
+    }
+
+    const resultMap: Map<string, EmbeddingResult<TMetadata>> = new Map();
+    const toEmbed: EmbeddingText<TMetadata>[] = [];
+
+    // Load cache
+    const loadPromises = ebdTexts.map(async (item, index) => {
+      const { content, metadata } = item;
+
+      // Use if already in memory
+      if (this.vectors.has(content)) {
+        resultMap.set(content, this.vectors.get(content));
+        return;
+      }
+
+      try {
+        const cached = await this.forage.getItem<EmbeddingResult<TMetadata>>(
+          this.getCacheKey(content)
+        );
+
+        if (cached) {
+          // Debug log for cache hit
+          console.debug(
+            HypaProcessorV2.LOG_PREFIX,
+            `Cache hit for getting embedding ${index} with model ${this.options.model}`
+          );
+
+          // Add metadata
+          cached.metadata = metadata;
+
+          // Save to memory
+          if (saveToMemory) {
+            this.vectors.set(content, cached);
+          }
+
+          resultMap.set(content, cached);
+        } else {
+          toEmbed.push(item);
+        }
+      } catch (error) {
+        toEmbed.push(item);
+      }
+    });
+
+    await Promise.all(loadPromises);
+
+    if (toEmbed.length === 0) {
+      return ebdTexts.map((item) => resultMap.get(item.content));
+    }
+
+    // Chunking array
+    const chunkSize = await this.getOptimalChunkSize();
+
+    // Debug log for optimal chunk size
+    console.debug(
+      HypaProcessorV2.LOG_PREFIX,
+      `Optimal chunk size for ${this.options.model}: ${chunkSize}`
+    );
+
+    const chunks = this.chunkArray(toEmbed, chunkSize);
+
+    if (this.isLocalModel()) {
+      // Local model: Sequential processing
+      for (let i = 0; i < chunks.length; i++) {
+        // Progress callback
+        this.progressCallback?.(chunks.length - i - 1);
+
+        const chunk = chunks[i];
+        const embeddings = await this.getLocalEmbeds(
+          chunk.map((item) => item.content)
+        );
+
+        const savePromises = embeddings.map(async (embedding, j) => {
+          const { content, metadata } = chunk[j];
+
+          const ebdResult: EmbeddingResult<TMetadata> = {
+            content,
+            embedding,
+            metadata,
+          };
+
+          // Save to DB
+          await this.forage.setItem(this.getCacheKey(content), {
+            content,
+            embedding,
+          });
+
+          // Save to memory
+          if (saveToMemory) {
+            this.vectors.set(content, ebdResult);
+          }
+
+          resultMap.set(content, ebdResult);
+        });
+
+        await Promise.all(savePromises);
+      }
+    } else {
+      // API model: Parallel processing
+      const embeddingTasks = chunks.map((chunk) => {
+        const contents = chunk.map((item) => item.content);
+
+        return () => this.getAPIEmbeds(contents);
+      });
+
+      // Progress callback
+      this.options.rateLimiter.taskQueueChangeCallback = this.progressCallback;
+
+      const batchResult = await this.options.rateLimiter.executeBatch<
+        EmbeddingVector[]
+      >(embeddingTasks);
+      const errors: Error[] = [];
+
+      const chunksSavePromises = batchResult.results.map(async (result, i) => {
+        if (!result.success) {
+          errors.push(result.error);
+          return;
+        }
+
+        if (!result.data) {
+          errors.push(new Error("No embeddings found in the response."));
+          return;
+        }
+
+        const chunk = chunks[i];
+        const savePromises = result.data.map(async (embedding, j) => {
+          const { content, metadata } = chunk[j];
+
+          const ebdResult: EmbeddingResult<TMetadata> = {
+            content,
+            embedding,
+            metadata,
+          };
+
+          // Save to DB
+          await this.forage.setItem(this.getCacheKey(content), {
+            content,
+            embedding,
+          });
+
+          // Save to memory
+          if (saveToMemory) {
+            this.vectors.set(content, ebdResult);
+          }
+
+          resultMap.set(content, ebdResult);
+        });
+
+        await Promise.all(savePromises);
+      });
+
+      await Promise.all(chunksSavePromises);
+
+      // Throw major error if there are errors
+      if (errors.length > 0) {
+        const majorError =
+          errors.find((error) => !(error instanceof TaskCanceledError)) ||
+          errors[0];
+
+        throw majorError;
+      }
+    }
+
+    return ebdTexts.map((item) => resultMap.get(item.content));
+  }
+
+  private similarity(a: EmbeddingVector, b: EmbeddingVector): number {
+    let dot = 0;
+    let magA = 0;
+    let magB = 0;
+
+    for (let i = 0; i < a.length; i++) {
+      dot += a[i] * b[i];
+      magA += a[i] * a[i];
+      magB += b[i] * b[i];
+    }
+
+    return dot / (Math.sqrt(magA) * Math.sqrt(magB));
+  }
+
+  private getCacheKey(content: string): string {
+    const db = getDatabase();
+    const suffix =
+      this.options.model === "custom" && db.hypaCustomSettings?.model?.trim()
+        ? `-${db.hypaCustomSettings.model.trim()}`
+        : "";
+
+    return `${content}|${this.options.model}${suffix}`;
+  }
+
+  private async getOptimalChunkSize(): Promise<number> {
+    // API
+    if (!this.isLocalModel()) {
+      return 50;
+    }
+
+    const isMobile = /Android|iPhone|iPad|iPod|webOS/i.test(
+      navigator.userAgent
+    );
+
+    // WebGPU
+    if ("gpu" in navigator) {
+      return isMobile ? 5 : 10;
+    }
+
+    // WASM
+    const cpuCores = navigator.hardwareConcurrency || 4;
+    const baseChunkSize = isMobile ? Math.floor(cpuCores / 2) : cpuCores;
+
+    return Math.min(baseChunkSize, 10);
+  }
+
+  private isLocalModel(): boolean {
+    return Object.keys(localModels.models).includes(this.options.model);
+  }
+
+  private chunkArray<T>(array: T[], size: number): T[][] {
+    const chunks: T[][] = [];
+
+    for (let i = 0; i < array.length; i += size) {
+      chunks.push(array.slice(i, i + size));
+    }
+
+    return chunks;
+  }
+
+  private async getLocalEmbeds(contents: string[]): Promise<EmbeddingVector[]> {
+    const results: Float32Array[] = await runEmbedding(
+      contents,
+      localModels.models[this.options.model],
+      localModels.gpuModels.includes(this.options.model) ? "webgpu" : "wasm"
+    );
+
+    return results;
+  }
+
+  private async getAPIEmbeds(contents: string[]): Promise<EmbeddingVector[]> {
+    const db = getDatabase();
+    let response = null;
+
+    if (this.options.model === "custom") {
+      if (!this.options.customEmbeddingUrl) {
+        throw new Error("Custom model requires a Custom Server URL");
+      }
+
+      const replaceUrl = this.options.customEmbeddingUrl.endsWith("/embeddings")
+        ? this.options.customEmbeddingUrl
+        : appendLastPath(this.options.customEmbeddingUrl, "embeddings");
+
+      const fetchArgs = {
+        headers: {
+          ...(db.hypaCustomSettings?.key?.trim()
+            ? { Authorization: "Bearer " + db.hypaCustomSettings.key.trim() }
+            : {}),
+        },
+        body: {
+          input: contents,
+          ...(db.hypaCustomSettings?.model?.trim()
+            ? { model: db.hypaCustomSettings.model.trim() }
+            : {}),
+        },
+      };
+
+      response = await globalFetch(replaceUrl, fetchArgs);
+    } else if (
+      ["ada", "openai3small", "openai3large"].includes(this.options.model)
+    ) {
+      const models = {
+        ada: "text-embedding-ada-002",
+        openai3small: "text-embedding-3-small",
+        openai3large: "text-embedding-3-large",
+      };
+
+      const fetchArgs = {
+        headers: {
+          Authorization:
+            "Bearer " +
+            (this.options.oaiKey?.trim() || db.supaMemoryKey?.trim()),
+        },
+        body: {
+          input: contents,
+          model: models[this.options.model],
+        },
+      };
+
+      response = await globalFetch(
+        "https://api.openai.com/v1/embeddings",
+        fetchArgs
+      );
+    } else {
+      throw new Error(`Unsupported model: ${this.options.model}`);
+    }
+
+    if (!response.ok || !response.data.data) {
+      throw new Error(JSON.stringify(response.data));
+    }
+
+    const embeddings: EmbeddingVector[] = response.data.data.map(
+      (item: { embedding: EmbeddingVector }) => {
+        if (!item.embedding) {
+          throw new Error("No embeddings found in the response.");
+        }
+
+        return item.embedding;
+      }
+    );
+
+    return embeddings;
+  }
+}
--- a/src/ts/process/memory/hypav3.ts
+++ b/src/ts/process/memory/hypav3.ts
--- a/src/ts/process/memory/taskRateLimiter.ts
+++ b/src/ts/process/memory/taskRateLimiter.ts
@@ -0,0 +1,188 @@
+export interface TaskRateLimiterOptions {
+  tasksPerMinute?: number;
+  maxConcurrentTasks?: number;
+  failFast?: boolean;
+}
+
+export interface BatchResult<TData> {
+  results: TaskResult<TData>[];
+  successCount: number;
+  failureCount: number;
+  allSucceeded: boolean;
+}
+
+export interface TaskResult<TData> {
+  success: boolean;
+  data?: TData;
+  error?: Error;
+}
+
+export class TaskRateLimiter {
+  private static readonly LOG_PREFIX = "[TaskRateLimiter]";
+  public readonly options: TaskRateLimiterOptions;
+  public taskQueueChangeCallback: (queuedCount: number) => void = null;
+  private timestamps: number[] = [];
+  private active: number = 0;
+  private queue: Array<{
+    task: () => Promise<TaskResult<any>>;
+    resolve: (result: TaskResult<any>) => void;
+  }> = [];
+
+  public constructor(options?: TaskRateLimiterOptions) {
+    this.options = {
+      tasksPerMinute: 20,
+      maxConcurrentTasks: 5,
+      failFast: true,
+      ...options,
+    };
+
+    if (this.options.maxConcurrentTasks > this.options.tasksPerMinute) {
+      throw new Error("maxConcurrentTasks must be less than tasksPerMinute");
+    }
+  }
+
+  public async executeTask<TData>(
+    task: () => Promise<TData>
+  ): Promise<TaskResult<TData>> {
+    return new Promise<TaskResult<TData>>((resolve) => {
+      this.queue.push({
+        task: async () => {
+          try {
+            const data = await task();
+            return { success: true, data };
+          } catch (error) {
+            return { success: false, error };
+          }
+        },
+        resolve,
+      });
+
+      this.taskQueueChangeCallback?.(this.queue.length);
+      this.processNextFromQueue();
+    });
+  }
+
+  public async executeBatch<TData>(
+    tasks: Array<() => Promise<TData>>
+  ): Promise<BatchResult<TData>> {
+    const taskResults = await Promise.all(
+      tasks.map((task) => this.executeTask(task))
+    );
+    const successCount = taskResults.filter((r) => r.success).length;
+    const failureCount = taskResults.length - successCount;
+
+    return {
+      results: taskResults,
+      successCount,
+      failureCount,
+      allSucceeded: failureCount === 0,
+    };
+  }
+
+  public cancelPendingTasks(reason: string): void {
+    const error = new TaskCanceledError(reason);
+
+    while (this.queue.length > 0) {
+      const { resolve } = this.queue.shift();
+      resolve({ success: false, error });
+    }
+
+    this.taskQueueChangeCallback?.(this.queue.length);
+  }
+
+  public get queuedTaskCount(): number {
+    return this.queue.length;
+  }
+
+  private processNextFromQueue(): void {
+    if (this.queue.length === 0) return;
+
+    if (this.active >= this.options.maxConcurrentTasks) {
+      // Debug log for concurrency limit hit
+      console.debug(
+        TaskRateLimiter.LOG_PREFIX,
+        "Concurrency limit hit:",
+        "\nTasks in last minute:",
+        this.timestamps.length + "/" + this.options.tasksPerMinute,
+        "\nActive tasks:",
+        this.active + "/" + this.options.maxConcurrentTasks,
+        "\nWaiting tasks in queue:",
+        this.queue.length
+      );
+
+      return;
+    }
+
+    this.timestamps = this.timestamps.filter(
+      (ts) => Date.now() - ts <= 60 * 1000
+    );
+
+    if (this.timestamps.length >= this.options.tasksPerMinute) {
+      const oldestTimestamp = Math.min(...this.timestamps);
+      const timeUntilExpiry = Math.max(
+        100,
+        60 * 1000 - (Date.now() - oldestTimestamp)
+      );
+
+      // Debug log for rate limit hit
+      console.debug(
+        TaskRateLimiter.LOG_PREFIX,
+        "Rate limit hit:",
+        "\nTasks in last minute:",
+        this.timestamps.length + "/" + this.options.tasksPerMinute,
+        "\nActive tasks:",
+        this.active + "/" + this.options.maxConcurrentTasks,
+        "\nWaiting tasks in queue:",
+        this.queue.length,
+        "\nWill retry in:",
+        timeUntilExpiry + "ms"
+      );
+
+      // Wait until rate limit window advances before retrying
+      setTimeout(() => this.processNextFromQueue(), timeUntilExpiry);
+      return;
+    }
+
+    const { task, resolve } = this.queue.shift();
+
+    this.active++;
+    this.taskQueueChangeCallback?.(this.queue.length);
+    this.timestamps.push(Date.now());
+
+    // Debug log for task start
+    console.debug(
+      TaskRateLimiter.LOG_PREFIX,
+      "Task started:",
+      "\nTasks in last minute:",
+      this.timestamps.length + "/" + this.options.tasksPerMinute,
+      "\nActive tasks:",
+      this.active + "/" + this.options.maxConcurrentTasks,
+      "\nWaiting tasks in queue:",
+      this.queue.length
+    );
+
+    task()
+      .then((result) => {
+        resolve(result);
+
+        if (!result.success && this.options.failFast) {
+          this.cancelPendingTasks("Task canceled due to previous failure");
+        }
+      })
+      .finally(() => {
+        this.active--;
+
+        // Prevents call stack overflow while maintaining concurrency limits
+        queueMicrotask(() => this.processNextFromQueue());
+      });
+  }
+}
+
+export class TaskCanceledError extends Error {
+  public readonly name: string;
+
+  public constructor(message: string) {
+    super(message);
+    this.name = "TaskCanceledError";
+  }
+}
--- a/src/ts/process/transformers.ts
+++ b/src/ts/process/transformers.ts
@@ -60,12 +60,19 @@ export const runEmbedding = async (texts: string[], model:EmbeddingModel = 'Xeno
    console.log('running embedding')
    let embeddingModelQuery = model + device
    if(!extractor || embeddingModelQuery !== lastEmbeddingModelQuery){
+        // Dispose old extractor
+        if(extractor) {
+            await extractor.dispose()
+        }
        extractor = await pipeline('feature-extraction', model, {
+            // Default dtype for webgpu is fp32, so we can use q8, which is the default dtype in wasm.
+            ...(device === 'webgpu' ? { dtype: "q8" } : {}),
            device: device,
            progress_callback: (progress) => {
                console.log(progress)
            }
        });
+        lastEmbeddingModelQuery = embeddingModelQuery
        console.log('extractor loaded')
    }
    let result = await extractor(texts, { pooling: 'mean', normalize: true });
--- a/src/ts/process/webllm.ts
+++ b/src/ts/process/webllm.ts
@@ -0,0 +1,60 @@
+import {
+  type ChatCompletionMessageParam,
+  type ChatCompletionRequestNonStreaming,
+  MLCEngine,
+  CreateMLCEngine,
+} from "@mlc-ai/web-llm";
+
+let engine: MLCEngine = null;
+let lastModel: string = null;
+
+export async function chatCompletion(
+  messages: { role: string; content: string }[],
+  model: string,
+  config: Record<string, any>
+): Promise<string> {
+  try {
+    if (!engine || lastModel !== model) {
+      if (engine) engine.unload();
+
+      const initProgressCallback = (progress) => {
+        console.log("[WebLLM]", progress);
+      };
+
+      engine = await CreateMLCEngine(
+        model,
+        {
+          initProgressCallback,
+        },
+        { context_window_size: 16384 }
+      );
+
+      lastModel = model;
+    }
+
+    const request: ChatCompletionRequestNonStreaming = {
+      messages: messages as ChatCompletionMessageParam[],
+      temperature: 0,
+      max_tokens: 4096,
+      ...config,
+    };
+    const completion = await engine.chat.completions.create(request);
+    const content = completion.choices[0].message.content;
+
+    return content;
+  } catch (error) {
+    if (error instanceof Error) {
+      throw error;
+    }
+
+    throw new Error(JSON.stringify(error));
+  }
+}
+
+export async function unloadEngine(): Promise<void> {
+  if (!engine) return;
+
+  await engine.unload();
+  engine = null;
+  lastModel = null;
+}
--- a/src/ts/storage/database.svelte.ts
+++ b/src/ts/storage/database.svelte.ts
@@ -11,6 +11,7 @@ import { prebuiltNAIpresets, prebuiltPresets } from '../process/templates/templa
 import { defaultColorScheme, type ColorScheme } from '../gui/colorscheme';
 import type { PromptItem, PromptSettings } from '../process/prompt';
 import type { OobaChatCompletionRequestParams } from '../model/ooba';
+import { type HypaV3Settings, type HypaV3Preset, createHypaV3Preset } from '../process/memory/hypav3'

 export let appVer = "159.0.0"
 export let webAppSubVer = ''
@@ -515,17 +516,21 @@ export function setDatabase(data:Database){
    data.checkCorruption ??= true
    data.OaiCompAPIKeys ??= {}
    data.reasoningEffort ??= 0
-    data.hypaV3Settings = {
-        memoryTokensRatio: data.hypaV3Settings?.memoryTokensRatio ?? 0.2,
-        extraSummarizationRatio: data.hypaV3Settings?.extraSummarizationRatio ?? 0,
-        maxChatsPerSummary: data.hypaV3Settings?.maxChatsPerSummary ?? 4,
-        recentMemoryRatio: data.hypaV3Settings?.recentMemoryRatio ?? 0.4,
-        similarMemoryRatio: data.hypaV3Settings?.similarMemoryRatio ?? 0.4,
-        enableSimilarityCorrection: data.hypaV3Settings?.enableSimilarityCorrection ?? false,
-        preserveOrphanedMemory: data.hypaV3Settings?.preserveOrphanedMemory ?? false,
-        processRegexScript: data.hypaV3Settings?.processRegexScript ?? false,
-        doNotSummarizeUserMessage: data.hypaV3Settings?.doNotSummarizeUserMessage ?? false
+    data.hypaV3Presets ??= [
+        createHypaV3Preset("Default", {
+            summarizationPrompt: data.supaMemoryPrompt ? data.supaMemoryPrompt : "",
+            ...data.hypaV3Settings
+        })
+    ]
+    if (data.hypaV3Presets.length > 0) {
+        data.hypaV3Presets = data.hypaV3Presets.map((preset, i) =>
+            createHypaV3Preset(
+                preset.name || `Preset ${i + 1}`,
+                preset.settings || {}
+            )
+        )
    }
+    data.hypaV3PresetId ??= 0
    data.returnCSSError ??= true
    data.useExperimentalGoogleTranslator ??= false
    if(data.antiClaudeOverload){ //migration
@@ -535,7 +540,7 @@ export function setDatabase(data:Database){
    data.hypaCustomSettings = {
        url: data.hypaCustomSettings?.url ?? "",
        key: data.hypaCustomSettings?.key ?? "",
-        model: data.hypaCustomSettings?.model ?? "",       
+        model: data.hypaCustomSettings?.model ?? ""     
    }
    data.doNotChangeSeperateModels ??= false
    data.modelTools ??= []
@@ -960,17 +965,10 @@ export interface Database{
    showPromptComparison:boolean
    checkCorruption:boolean
    hypaV3:boolean
-    hypaV3Settings: {
-        memoryTokensRatio: number
-        extraSummarizationRatio: number
-        maxChatsPerSummary: number
-        recentMemoryRatio: number
-        similarMemoryRatio: number
-        enableSimilarityCorrection: boolean
-        preserveOrphanedMemory: boolean
-        processRegexScript: boolean
-        doNotSummarizeUserMessage: boolean
-    }
+    hypaV3Settings: HypaV3Settings // legacy
+    hypaV3Presets: HypaV3Preset[]
+    hypaV3PresetId: number
+    showMenuHypaMemoryModal:boolean
    OaiCompAPIKeys: {[key:string]:string}
    inlayErrorResponse:boolean
    reasoningEffort:number
@@ -1026,6 +1024,8 @@ export interface Database{
    igpPrompt:string
    useTokenizerCaching:boolean
    showMenuHypaMemoryModal:boolean
+    promptInfoInsideChat:boolean
+    promptTextInfoInsideChat:boolean
 }

 interface SeparateParameters{
@@ -1504,6 +1504,7 @@ export interface Message{
    chatId?:string
    time?: number
    generationInfo?: MessageGenerationInfo
+    promptInfo?: MessagePresetInfo
    name?:string
    otherUser?:boolean
 }
@@ -1516,6 +1517,12 @@ export interface MessageGenerationInfo{
    maxContext?: number
 }

+export interface MessagePresetInfo{
+    promptName?: string,
+    promptToggles?: {key: string, value: string}[],
+    promptText?: OpenAIChat[],
+}
+
 interface AINsettings{
    top_p: number,
    rep_pen: number,
@@ -1882,6 +1889,7 @@ import type { Parameter } from '../process/request';
 import type { HypaModel } from '../process/memory/hypamemory';
 import type { SerializableHypaV3Data } from '../process/memory/hypav3';
 import { defaultHotkeys, type Hotkey } from '../defaulthotkeys';
+import type { OpenAIChat } from '../process/index.svelte';

 export async function downloadPreset(id:number, type:'json'|'risupreset'|'return' = 'json'){
    saveCurrentPreset()
--- a/src/ts/stores.svelte.ts
+++ b/src/ts/stores.svelte.ts
@@ -50,6 +50,13 @@ export const alertStore = writable({
    type: 'none',
    msg: 'n',
 } as alertData)
+export const hypaV3ModalOpen = writable(false)
+export const hypaV3ProgressStore = writable({
+    open: false,
+    miniMsg: '',
+    msg: '',
+    subMsg: '',
+})
 export const selIdState = $state({
    selId: -1
 })