From 5b0ca460a5d5335b01ddc45c0dfeb035fde9ae1b Mon Sep 17 00:00:00 2001 From: YH_KIM Date: Sat, 3 May 2025 16:38:04 +0900 Subject: [PATCH 01/11] Support numeric values in inputs and enhance image generation configs Updated `SelectInput` and `OptionInput` to allow `value` as `string | number`. Extended image generation configurations with new features, including `cfg_rescale`, `noise_schedule`, and vibe data handling. Improved the setup for reference images and added support for mnemonist dependency in the package manager. --- pnpm-lock.yaml | 15 ++ src/lib/Setting/Pages/OtherBotSettings.svelte | 203 ++++++++++++++---- src/lib/UI/GUI/OptionInput.svelte | 2 +- src/lib/UI/GUI/SelectInput.svelte | 2 +- src/test/runTest.ts | 12 +- src/ts/process/stableDiff.ts | 78 +++++-- src/ts/storage/database.svelte.ts | 44 +++- 7 files changed, 284 insertions(+), 72 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c460c2c9..155294d5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -158,6 +158,9 @@ importers: ml-distance: specifier: ^4.0.1 version: 4.0.1 + mnemonist: + specifier: ^0.40.3 + version: 0.40.3 mobile-drag-drop: specifier: 3.0.0-rc.0 version: 3.0.0-rc.0 @@ -2756,6 +2759,9 @@ packages: ml-tree-similarity@1.0.0: resolution: {integrity: sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==} + mnemonist@0.40.3: + resolution: {integrity: sha512-Vjyr90sJ23CKKH/qPAgUKicw/v6pRoamxIEDFOF8uSgFME7DqPRpHgRTejWVjkdGg5dXj0/NyxZHZ9bcjH+2uQ==} + mobile-drag-drop@3.0.0-rc.0: resolution: {integrity: sha512-f8wIDTbBYLBW/+5sei1cqUE+StyDpf/LP+FRZELlVX6tmOOmELk84r3wh1z3woxCB9G5octhF06K5COvFjGgqg==} @@ -2900,6 +2906,9 @@ packages: object-inspect@1.13.1: resolution: {integrity: sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==} + obliterator@2.0.5: + resolution: {integrity: sha512-42CPE9AhahZRsMNslczq0ctAEtqk8Eka26QofnqC346BZdHDySk3LWka23LI7ULIw11NmltpiLagIq8gBozxTw==} + ollama@0.5.0: resolution: {integrity: sha512-CRtRzsho210EGdK52GrUMohA2pU+7NbgEaBG3DcYeRmvQthDO7E2LHOkLlUUeaYUlNmEd8icbjC02ug9meSYnw==} @@ -6505,6 +6514,10 @@ snapshots: binary-search: 1.3.6 num-sort: 2.1.0 + mnemonist@0.40.3: + dependencies: + obliterator: 2.0.5 + mobile-drag-drop@3.0.0-rc.0: {} modify-values@1.0.1: {} @@ -6665,6 +6678,8 @@ snapshots: object-inspect@1.13.1: {} + obliterator@2.0.5: {} + ollama@0.5.0: dependencies: whatwg-fetch: 3.6.20 diff --git a/src/lib/Setting/Pages/OtherBotSettings.svelte b/src/lib/Setting/Pages/OtherBotSettings.svelte index 13b8e9bd..8dbd65e4 100644 --- a/src/lib/Setting/Pages/OtherBotSettings.svelte +++ b/src/lib/Setting/Pages/OtherBotSettings.svelte @@ -3,7 +3,8 @@ import { language } from "src/lang"; import Help from "src/lib/Others/Help.svelte"; import { selectSingleFile } from "src/ts/util"; - + import { alertError } from "src/ts/alert"; + import { DBState } from 'src/ts/stores.svelte'; import { isTauri, saveAsset } from "src/ts/globalApi.svelte"; import NumberInput from "src/lib/UI/GUI/NumberInput.svelte"; @@ -39,6 +40,7 @@ autoSmea:false, legacy_uc:false, use_coords:false, + cfg_rescale:0, v4_prompt:{ caption:{ base_caption:'', @@ -53,7 +55,12 @@ char_captions:[] }, legacy_uc:false, - } + }, + reference_image_multiple: [], + reference_strength_multiple: [0.7], + vibe_data: undefined, + vibe_model_selection: undefined, + noise_schedule: 'karras' } if (DBState.db.NAIImgConfig.sampler === 'ddim_v3'){ DBState.db.NAIImgConfig.sm = false @@ -69,7 +76,7 @@ untrack(() => { DBState.db.hypaV3Settings.recentMemoryRatio = newValue; - + if (newValue + DBState.db.hypaV3Settings.similarMemoryRatio > 1) { DBState.db.hypaV3Settings.similarMemoryRatio = 1 - newValue; } @@ -162,7 +169,7 @@ ComfyUI (Legacy) {/if} - + {#if DBState.db.sdProvider === 'webui'} You must use WebUI with --api flag You must use WebUI without agpl license or use unmodified version with agpl license to observe the contents of the agpl license. @@ -173,17 +180,17 @@ Steps - + CFG Scale - + Width Height Sampler - +
@@ -250,6 +257,13 @@ CFG scale + Noise Schedule + + karras + exponential + polyexponential + + {#if !DBState.db.NAII2I || DBState.db.NAIImgConfig.sampler !== 'ddim_v3'} {:else if DBState.db.NAIImgModel === 'nai-diffusion-4-full' @@ -260,13 +274,17 @@ {#if DBState.db.NAIImgModel === 'nai-diffusion-4-full' || DBState.db.NAIImgModel === 'nai-diffusion-4-curated-preview'} + + Prompt Guidance Rescale + + + - {/if} @@ -308,43 +326,138 @@ - {#if DBState.db.NAIREF} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Vibe + + + {#if DBState.db.NAIImgConfig.vibe_data} +
+ Vibe Preview + +
+ + Vibe Model + { + // When vibe model changes, set InfoExtracted to the first value + if (DBState.db.NAIImgConfig.vibe_data?.encodings && + DBState.db.NAIImgConfig.vibe_model_selection && + DBState.db.NAIImgConfig.vibe_data.encodings[DBState.db.NAIImgConfig.vibe_model_selection]) { + const encodings = DBState.db.NAIImgConfig.vibe_data.encodings[DBState.db.NAIImgConfig.vibe_model_selection]; + const firstKey = Object.keys(encodings)[0]; + if (firstKey) { + DBState.db.NAIImgConfig.InfoExtracted = Number(encodings[firstKey].params.information_extracted); + } + } + }}> + {#if DBState.db.NAIImgConfig.vibe_data.encodings?.v4full} + nai-diffusion-4-full + {/if} + {#if DBState.db.NAIImgConfig.vibe_data.encodings?.v4curated} + nai-diffusion-4-curated + {/if} + Information Extracted - - {DBState.db.NAIImgConfig.InfoExtracted} - Reference Strength - - {DBState.db.NAIImgConfig.RefStrength} - - - - - Reference image - + + + Reference Strength Multiple + {/if} {/if} @@ -468,16 +581,16 @@ ElevenLabs API key - + VOICEVOX URL - + OpenAI Key NovelAI API key - + Huggingface Key @@ -679,4 +792,4 @@ {/if} -{/if} \ No newline at end of file +{/if} diff --git a/src/lib/UI/GUI/OptionInput.svelte b/src/lib/UI/GUI/OptionInput.svelte index 2112e776..4494e294 100644 --- a/src/lib/UI/GUI/OptionInput.svelte +++ b/src/lib/UI/GUI/OptionInput.svelte @@ -1,7 +1,7 @@ -
+
@@ -877,13 +874,9 @@ class="p-2 text-zinc-400 hover:text-zinc-200 transition-colors" tabindex="-1" onclick={() => { - alertStore.set({ - type: "none", - msg: "", - }); - - settingsOpen.set(true); - SettingsMenuIndex.set(2); // Other bot settings + $hypaV3ModalOpen = false; + $settingsOpen = true; + $SettingsMenuIndex = 2; // Other bot settings }} > @@ -906,8 +899,6 @@ summaries: [], lastSelectedSummaries: [], }; - } else { - showHypaV3Alert(); } }} > @@ -919,10 +910,7 @@ class="p-2 text-zinc-400 hover:text-zinc-200 transition-colors" tabindex="-1" onclick={() => { - alertStore.set({ - type: "none", - msg: "", - }); + $hypaV3ModalOpen = false; }} > @@ -960,8 +948,6 @@ ) ); } - - showHypaV3Alert(); }} > {language.hypaV3Modal.convertButton} @@ -976,7 +962,7 @@ {:else if searchUIState} -
+
{ if ( - await alertConfirm(language.hypaV3Modal.deleteThisConfirmMessage) + await alertConfirm( + language.hypaV3Modal.deleteThisConfirmMessage + ) ) { - hypaV3DataState.summaries = hypaV3DataState.summaries.filter( - (_, index) => index !== i - ); + hypaV3DataState.summaries = + hypaV3DataState.summaries.filter( + (_, index) => index !== i + ); } - - showHypaV3Alert(); }} > @@ -1122,8 +1108,6 @@ ) { hypaV3DataState.summaries.splice(i + 1); } - - showHypaV3Alert(); }} > diff --git a/src/lib/Others/HypaV3Progress.svelte b/src/lib/Others/HypaV3Progress.svelte new file mode 100644 index 00000000..35d098ab --- /dev/null +++ b/src/lib/Others/HypaV3Progress.svelte @@ -0,0 +1,55 @@ + + +{#if isExpanded} +
+ +
+{:else} + +{/if} diff --git a/src/lib/Setting/Pages/OtherBotSettings.svelte b/src/lib/Setting/Pages/OtherBotSettings.svelte index 474eb59b..2698b6a9 100644 --- a/src/lib/Setting/Pages/OtherBotSettings.svelte +++ b/src/lib/Setting/Pages/OtherBotSettings.svelte @@ -3,15 +3,13 @@ import { language } from "src/lang"; import Help from "src/lib/Others/Help.svelte"; import { selectSingleFile } from "src/ts/util"; - - import { DBState } from 'src/ts/stores.svelte'; - import { isTauri, saveAsset } from "src/ts/globalApi.svelte"; + import { DBState, selectedCharID } from 'src/ts/stores.svelte'; + import { isTauri, saveAsset, downloadFile } from "src/ts/globalApi.svelte"; import NumberInput from "src/lib/UI/GUI/NumberInput.svelte"; import TextInput from "src/lib/UI/GUI/TextInput.svelte"; import SelectInput from "src/lib/UI/GUI/SelectInput.svelte"; import OptionInput from "src/lib/UI/GUI/OptionInput.svelte"; import SliderInput from "src/lib/UI/GUI/SliderInput.svelte"; - import Button from "src/lib/UI/GUI/Button.svelte"; import { getCharImage } from "src/ts/characters"; import Arcodion from "src/lib/UI/Arcodion.svelte"; import CheckInput from "src/lib/UI/GUI/CheckInput.svelte"; @@ -19,7 +17,9 @@ import { untrack } from "svelte"; import { tokenizePreset } from "src/ts/process/prompt"; import { getCharToken } from "src/ts/tokenizer"; - import { selectedCharID } from "src/ts/stores.svelte"; + import { PlusIcon, PencilIcon, TrashIcon, DownloadIcon, FolderUpIcon } from "lucide-svelte"; + import { alertError, alertInput, alertConfirm, alertNormal } from "src/ts/alert"; + import { createHypaV3Preset } from "src/ts/process/memory/hypav3"; $effect.pre(() => { DBState.db.NAIImgConfig ??= { @@ -67,25 +67,35 @@ // HypaV3 $effect(() => { - const newValue = Math.min(DBState.db.hypaV3Settings.recentMemoryRatio, 1); + const settings = DBState.db.hypaV3Presets?.[DBState.db.hypaV3PresetId]?.settings; + const currentValue = settings?.similarMemoryRatio; + + if (!currentValue) return; untrack(() => { - DBState.db.hypaV3Settings.recentMemoryRatio = newValue; - - if (newValue + DBState.db.hypaV3Settings.similarMemoryRatio > 1) { - DBState.db.hypaV3Settings.similarMemoryRatio = 1 - newValue; + const newValue = Math.min(currentValue, 1); + + settings.similarMemoryRatio = newValue; + + if (newValue + settings.recentMemoryRatio > 1) { + settings.recentMemoryRatio = 1 - newValue; } }) }); $effect(() => { - const newValue = Math.min(DBState.db.hypaV3Settings.similarMemoryRatio, 1); + const settings = DBState.db.hypaV3Presets?.[DBState.db.hypaV3PresetId]?.settings; + const currentValue = settings?.recentMemoryRatio; + + if (!currentValue) return; untrack(() => { - DBState.db.hypaV3Settings.similarMemoryRatio = newValue; + const newValue = Math.min(currentValue, 1); - if (newValue + DBState.db.hypaV3Settings.recentMemoryRatio > 1) { - DBState.db.hypaV3Settings.recentMemoryRatio = 1 - newValue; + settings.recentMemoryRatio = newValue; + + if (newValue + settings.similarMemoryRatio > 1) { + settings.similarMemoryRatio = 1 - newValue; } }) }); @@ -542,20 +552,11 @@ DBState.db.hanuraiEnable = false DBState.db.hypaV3 = false } else if (value === 'hypaV3') { - DBState.db.supaModelType = 'subModel' DBState.db.memoryAlgorithmType = 'hypaMemoryV3' - DBState.db.hypav2 = false + DBState.db.supaModelType = 'none' DBState.db.hanuraiEnable = false + DBState.db.hypav2 = false DBState.db.hypaV3 = true - DBState.db.hypaV3Settings.memoryTokensRatio = 0.2 - DBState.db.hypaV3Settings.extraSummarizationRatio = 0 - DBState.db.hypaV3Settings.maxChatsPerSummary = 4 - DBState.db.hypaV3Settings.recentMemoryRatio = 0.4 - DBState.db.hypaV3Settings.similarMemoryRatio = 0.4 - DBState.db.hypaV3Settings.enableSimilarityCorrection = false - DBState.db.hypaV3Settings.preserveOrphanedMemory = false - DBState.db.hypaV3Settings.processRegexScript = false - DBState.db.hypaV3Settings.doNotSummarizeUserMessage = false } else { DBState.db.supaModelType = 'none' DBState.db.memoryAlgorithmType = 'none' @@ -597,46 +598,191 @@ {language.hypaAllocatedTokens} {:else if DBState.db.hypaV3} - {language.hypaV3Settings.descriptionLabel} - {language.SuperMemory} {language.model} - - distilbart-cnn-6-6 (Free/Local) - {language.submodel} - - {language.summarizationPrompt} -
- -
- {#await getMaxMemoryRatio() then maxMemoryRatio} - {language.hypaV3Settings.maxMemoryTokensRatioLabel} - - {:catch error} - {language.hypaV3Settings.maxMemoryTokensRatioError} - {/await} - {language.hypaV3Settings.memoryTokensRatioLabel} - - {language.hypaV3Settings.extraSummarizationRatioLabel} - - {language.hypaV3Settings.maxChatsPerSummaryLabel} - - {language.hypaV3Settings.recentMemoryRatioLabel} - - {language.hypaV3Settings.similarMemoryRatioLabel} - - {language.hypaV3Settings.randomMemoryRatioLabel} - -
- -
-
- -
-
- -
-
- + {language.hypaV3Settings.descriptionLabel} + Preset + + +
+ + + + + + +
+ + + +
+ + {#if DBState.db.hypaV3Presets?.[DBState.db.hypaV3PresetId]?.settings} + {@const settings = DBState.db.hypaV3Presets[DBState.db.hypaV3PresetId].settings} + + {language.SuperMemory} {language.model} + + {language.submodel} + {#if "gpu" in navigator} + Qwen3 1.7B (GPU) + Qwen3 4B (GPU) + Qwen3 8B (GPU) + {/if} + + {language.summarizationPrompt} +
+ +
+ {#await getMaxMemoryRatio() then maxMemoryRatio} + {language.hypaV3Settings.maxMemoryTokensRatioLabel} + + {:catch error} + {language.hypaV3Settings.maxMemoryTokensRatioError} + {/await} + {language.hypaV3Settings.memoryTokensRatioLabel} + + {language.hypaV3Settings.extraSummarizationRatioLabel} + + {language.hypaV3Settings.maxChatsPerSummaryLabel} + + {language.hypaV3Settings.recentMemoryRatioLabel} + + {language.hypaV3Settings.similarMemoryRatioLabel} + + {language.hypaV3Settings.randomMemoryRatioLabel} + +
+ +
+
+ +
+
+ +
+ +
+ +
+ {#if settings.useExperimentalImpl} + Summarization Requests Per Minute + + Summarization Max Concurrent + + Embedding Requests Per Minute + + Embedding Max Concurrent + + {:else} +
+ +
+ {/if} +
+ {/if} + +
{:else if (DBState.db.supaModelType !== 'none' && DBState.db.hypav2 === false && DBState.db.hypaV3 === false)} {language.supaDesc} {language.SuperMemory} {language.model} @@ -663,14 +809,17 @@ {language.embedding} {#if 'gpu' in navigator} + MiniLM L6 v2 (GPU) Nomic Embed Text v1.5 (GPU) BGE Small English (GPU) BGE Medium 3 (GPU) + Multilingual MiniLM L12 v2 (GPU) {/if} MiniLM L6 v2 (CPU) Nomic Embed Text v1.5 (CPU) BGE Small English (CPU) BGE Medium 3 (CPU) + Multilingual MiniLM L12 v2 (CPU) OpenAI text-embedding-3-small OpenAI text-embedding-3-large OpenAI Ada diff --git a/src/lib/SideBars/CharConfig.svelte b/src/lib/SideBars/CharConfig.svelte index c4c90a6e..3fa58c3f 100644 --- a/src/lib/SideBars/CharConfig.svelte +++ b/src/lib/SideBars/CharConfig.svelte @@ -3,12 +3,12 @@ import { tokenizeAccurate } from "../../ts/tokenizer"; import { saveImage as saveAsset, type Database, type character, type groupChat } from "../../ts/storage/database.svelte"; import { DBState } from 'src/ts/stores.svelte'; - import { CharConfigSubMenu, MobileGUI, ShowRealmFrameStore, selectedCharID } from "../../ts/stores.svelte"; + import { CharConfigSubMenu, MobileGUI, ShowRealmFrameStore, selectedCharID, hypaV3ModalOpen } from "../../ts/stores.svelte"; import { PlusIcon, SmileIcon, TrashIcon, UserIcon, ActivityIcon, BookIcon, User, CurlyBraces, Volume2Icon, DownloadIcon, FolderUpIcon, Share2Icon } from 'lucide-svelte' import Check from "../UI/GUI/CheckInput.svelte"; import { addCharEmotion, addingEmotion, getCharImage, rmCharEmotion, selectCharImg, makeGroupImage, removeChar, changeCharImage } from "../../ts/characters"; import LoreBook from "./LoreBook/LoreBookSetting.svelte"; - import { alertConfirm, alertMd, alertNormal, alertSelectChar, alertTOS, showHypaV2Alert, showHypaV3Alert } from "../../ts/alert"; + import { alertConfirm, alertMd, alertNormal, alertSelectChar, alertTOS, showHypaV2Alert } from "../../ts/alert"; import BarIcon from "./BarIcon.svelte"; import { findCharacterbyId, getAuthorNoteDefaultText, parseKeyValue, selectMultipleFile, selectSingleFile } from "../../ts/util"; import { onDestroy } from "svelte"; @@ -1103,10 +1103,10 @@ > {language.hypaMemoryV2Modal} - {:else if DBState.db.supaModelType !== 'none' && DBState.db.hypaV3} + {:else if DBState.db.hypaV3}
{@render toggles(true)} - {#if DBState.db.supaModelType !== 'none' || DBState.db.hanuraiEnable} + {#if DBState.db.supaModelType !== 'none' || DBState.db.hanuraiEnable || DBState.db.hypaV3}
- +
{/if}
@@ -64,9 +64,9 @@
{@render toggles()} - {#if DBState.db.supaModelType !== 'none' || DBState.db.hanuraiEnable} + {#if DBState.db.supaModelType !== 'none' || DBState.db.hanuraiEnable || DBState.db.hypaV3}
- +
{/if} {/if} \ No newline at end of file diff --git a/src/ts/alert.ts b/src/ts/alert.ts index c7c96a88..746f6797 100644 --- a/src/ts/alert.ts +++ b/src/ts/alert.ts @@ -10,7 +10,7 @@ export interface alertData{ type: 'error'|'normal'|'none'|'ask'|'wait'|'selectChar' |'input'|'toast'|'wait2'|'markdown'|'select'|'login' |'tos'|'cardexport'|'requestdata'|'addchar'|'hypaV2'|'selectModule' - |'chatOptions'|'pukmakkurit'|'branches'|'hypaV3'|'progress', + |'chatOptions'|'pukmakkurit'|'branches'|'progress', msg: string, submsg?: string } @@ -319,10 +319,3 @@ export function showHypaV2Alert(){ 'msg': "" }) } - -export function showHypaV3Alert(){ - alertStoreImported.set({ - 'type': 'hypaV3', - 'msg': "" - }) -} \ No newline at end of file diff --git a/src/ts/process/memory/hypamemory.ts b/src/ts/process/memory/hypamemory.ts index 07b808fb..07af73d4 100644 --- a/src/ts/process/memory/hypamemory.ts +++ b/src/ts/process/memory/hypamemory.ts @@ -1,27 +1,33 @@ import localforage from "localforage"; import { globalFetch } from "src/ts/globalApi.svelte"; import { runEmbedding } from "../transformers"; -import { alertError } from "src/ts/alert"; import { appendLastPath } from "src/ts/util"; import { getDatabase } from "src/ts/storage/database.svelte"; +export type HypaModel = 'custom'|'ada'|'openai3small'|'openai3large'|'MiniLM'|'MiniLMGPU'|'nomic'|'nomicGPU'|'bgeSmallEn'|'bgeSmallEnGPU'|'bgem3'|'bgem3GPU'|'multiMiniLM'|'multiMiniLMGPU' -export type HypaModel = 'ada'|'MiniLM'|'nomic'|'custom'|'nomicGPU'|'bgeSmallEn'|'bgeSmallEnGPU'|'bgem3'|'bgem3GPU'|'openai3small'|'openai3large' - -const localModels = { +// In a typical environment, bge-m3 is a heavy model. +// If your GPU can't handle this model, you'll see errror below. +// Failed to execute 'mapAsync' on 'GPUBuffer': [Device] is lost +export const localModels = { models: { 'MiniLM':'Xenova/all-MiniLM-L6-v2', + 'MiniLMGPU': "Xenova/all-MiniLM-L6-v2", 'nomic':'nomic-ai/nomic-embed-text-v1.5', 'nomicGPU':'nomic-ai/nomic-embed-text-v1.5', - 'bgeSmallEn': 'BAAI/bge-small-en-v1.5', - 'bgeSmallEnGPU': 'BAAI/bge-small-en-v1.5', - 'bgem3': 'BAAI/bge-m3', - 'bgem3GPU': 'BAAI/bge-m3', + 'bgeSmallEn': 'Xenova/bge-small-en-v1.5', + 'bgeSmallEnGPU': 'Xenova/bge-small-en-v1.5', + 'bgem3': 'Xenova/bge-m3', + 'bgem3GPU': 'Xenova/bge-m3', + 'multiMiniLM': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', + 'multiMiniLMGPU': 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', }, gpuModels:[ + 'MiniLMGPU', 'nomicGPU', 'bgeSmallEnGPU', - 'bgem3GPU' + 'bgem3GPU', + 'multiMiniLMGPU', ] } @@ -44,7 +50,7 @@ export class HypaProcesser{ else{ this.model = model } - this.customEmbeddingUrl = customEmbeddingUrl || db.hypaCustomSettings.url + this.customEmbeddingUrl = customEmbeddingUrl?.trim() || db.hypaCustomSettings?.url?.trim() || "" } async embedDocuments(texts: string[]): Promise { @@ -80,10 +86,12 @@ export class HypaProcesser{ const db = getDatabase() const fetchArgs = { - ...(db.hypaCustomSettings.key ? {headers: {"Authorization": "Bearer " + db.hypaCustomSettings.key}} : {}), + headers: { + ...(db.hypaCustomSettings?.key?.trim() ? {"Authorization": "Bearer " + db.hypaCustomSettings.key.trim()} : {}) + }, body: { "input": input, - ...(db.hypaCustomSettings.model ? {"model": db.hypaCustomSettings.model} : {}) + ...(db.hypaCustomSettings?.model?.trim() ? {"model": db.hypaCustomSettings.model.trim()} : {}) } }; @@ -99,7 +107,7 @@ export class HypaProcesser{ gf = await globalFetch("https://api.openai.com/v1/embeddings", { headers: { - "Authorization": "Bearer " + db.supaMemoryKey || this.oaikey + "Authorization": "Bearer " + (this.oaikey?.trim() || db.supaMemoryKey?.trim()) }, body: { "input": input, @@ -134,7 +142,7 @@ export class HypaProcesser{ async addText(texts:string[]) { const db = getDatabase() - const suffix = (this.model === 'custom' && db.hypaCustomSettings.model) ? `-${db.hypaCustomSettings.model}` : "" + const suffix = (this.model === 'custom' && db.hypaCustomSettings?.model?.trim()) ? `-${db.hypaCustomSettings.model.trim()}` : "" for(let i=0;i(arr: T[], chunkSize: number) => const chunk = chunks[chunkIndex] || []; chunks[chunkIndex] = chunk.concat([elem]); return chunks; -}, [] as T[][]); \ No newline at end of file +}, [] as T[][]); diff --git a/src/ts/process/memory/hypamemoryv2.ts b/src/ts/process/memory/hypamemoryv2.ts new file mode 100644 index 00000000..14e1a08a --- /dev/null +++ b/src/ts/process/memory/hypamemoryv2.ts @@ -0,0 +1,414 @@ +import localforage from "localforage"; +import { type HypaModel, localModels } from "./hypamemory"; +import { TaskRateLimiter, TaskCanceledError } from "./taskRateLimiter"; +import { runEmbedding } from "../transformers"; +import { globalFetch } from "src/ts/globalApi.svelte"; +import { getDatabase } from "src/ts/storage/database.svelte"; +import { appendLastPath } from "src/ts/util"; + +export interface HypaProcessorV2Options { + model?: HypaModel; + customEmbeddingUrl?: string; + oaiKey?: string; + rateLimiter?: TaskRateLimiter; +} + +export interface EmbeddingText { + content: string; + metadata?: TMetadata; +} + +export interface EmbeddingResult extends EmbeddingText { + embedding: EmbeddingVector; +} + +export type EmbeddingVector = number[] | Float32Array; + +export class HypaProcessorV2 { + private static readonly LOG_PREFIX = "[HypaProcessorV2]"; + public readonly options: HypaProcessorV2Options; + public progressCallback: (queuedCount: number) => void = null; + private vectors: Map> = new Map(); + private forage: LocalForage = localforage.createInstance({ + name: "hypaVector", + }); + + public constructor(options?: HypaProcessorV2Options) { + const db = getDatabase(); + + this.options = { + model: db.hypaModel || "MiniLM", + customEmbeddingUrl: db.hypaCustomSettings?.url?.trim() || "", + oaiKey: db.supaMemoryKey?.trim() || "", + rateLimiter: new TaskRateLimiter(), + ...options, + }; + } + + public async addTexts(ebdTexts: EmbeddingText[]): Promise { + await this.getEmbeds(ebdTexts, true); + } + + public async similaritySearchScored( + query: string + ): Promise<[EmbeddingResult, number][]> { + const results = await this.similaritySearchScoredBatch([query]); + return results[0]; + } + + public async similaritySearchScoredBatch( + queries: string[] + ): Promise<[EmbeddingResult, number][][]> { + if (queries.length === 0) { + return []; + } + + // Remove duplicate queries + const uniqueQueries = [...new Set(queries)]; + + // Convert queries to EmbeddingText array + const ebdTexts: EmbeddingText[] = uniqueQueries.map((query) => ({ + content: query, + })); + + // Get query embeddings (don't save to memory) + const ebdResults = await this.getEmbeds(ebdTexts, false); + + const scoredResultsMap = new Map< + string, + [EmbeddingResult, number][] + >(); + + // Calculate similarity for each unique query + for (let i = 0; i < uniqueQueries.length; i++) { + const ebdResult = ebdResults[i]; + + const scoredVectors = Array.from(this.vectors.values()) + .map((vector): [EmbeddingResult, number] => [ + vector, + this.similarity(ebdResult.embedding, vector.embedding), + ]) + .sort((a, b) => b[1] - a[1]); + + scoredResultsMap.set(uniqueQueries[i], scoredVectors); + } + + return queries.map((query) => scoredResultsMap.get(query)); + } + + private async getEmbeds( + ebdTexts: EmbeddingText[], + saveToMemory: boolean = true + ): Promise[]> { + if (ebdTexts.length === 0) { + return []; + } + + const resultMap: Map> = new Map(); + const toEmbed: EmbeddingText[] = []; + + // Load cache + const loadPromises = ebdTexts.map(async (item, index) => { + const { content, metadata } = item; + + // Use if already in memory + if (this.vectors.has(content)) { + resultMap.set(content, this.vectors.get(content)); + return; + } + + try { + const cached = await this.forage.getItem>( + this.getCacheKey(content) + ); + + if (cached) { + // Debug log for cache hit + console.debug( + HypaProcessorV2.LOG_PREFIX, + `Cache hit for getting embedding ${index} with model ${this.options.model}` + ); + + // Add metadata + cached.metadata = metadata; + + // Save to memory + if (saveToMemory) { + this.vectors.set(content, cached); + } + + resultMap.set(content, cached); + } else { + toEmbed.push(item); + } + } catch (error) { + toEmbed.push(item); + } + }); + + await Promise.all(loadPromises); + + if (toEmbed.length === 0) { + return ebdTexts.map((item) => resultMap.get(item.content)); + } + + // Chunking array + const chunkSize = await this.getOptimalChunkSize(); + + // Debug log for optimal chunk size + console.debug( + HypaProcessorV2.LOG_PREFIX, + `Optimal chunk size for ${this.options.model}: ${chunkSize}` + ); + + const chunks = this.chunkArray(toEmbed, chunkSize); + + if (this.isLocalModel()) { + // Local model: Sequential processing + for (let i = 0; i < chunks.length; i++) { + // Progress callback + this.progressCallback?.(chunks.length - i - 1); + + const chunk = chunks[i]; + const embeddings = await this.getLocalEmbeds( + chunk.map((item) => item.content) + ); + + const savePromises = embeddings.map(async (embedding, j) => { + const { content, metadata } = chunk[j]; + + const ebdResult: EmbeddingResult = { + content, + embedding, + metadata, + }; + + // Save to DB + await this.forage.setItem(this.getCacheKey(content), { + content, + embedding, + }); + + // Save to memory + if (saveToMemory) { + this.vectors.set(content, ebdResult); + } + + resultMap.set(content, ebdResult); + }); + + await Promise.all(savePromises); + } + } else { + // API model: Parallel processing + const embeddingTasks = chunks.map((chunk) => { + const contents = chunk.map((item) => item.content); + + return () => this.getAPIEmbeds(contents); + }); + + // Progress callback + this.options.rateLimiter.taskQueueChangeCallback = this.progressCallback; + + const batchResult = await this.options.rateLimiter.executeBatch< + EmbeddingVector[] + >(embeddingTasks); + const errors: Error[] = []; + + const chunksSavePromises = batchResult.results.map(async (result, i) => { + if (!result.success) { + errors.push(result.error); + return; + } + + if (!result.data) { + errors.push(new Error("No embeddings found in the response.")); + return; + } + + const chunk = chunks[i]; + const savePromises = result.data.map(async (embedding, j) => { + const { content, metadata } = chunk[j]; + + const ebdResult: EmbeddingResult = { + content, + embedding, + metadata, + }; + + // Save to DB + await this.forage.setItem(this.getCacheKey(content), { + content, + embedding, + }); + + // Save to memory + if (saveToMemory) { + this.vectors.set(content, ebdResult); + } + + resultMap.set(content, ebdResult); + }); + + await Promise.all(savePromises); + }); + + await Promise.all(chunksSavePromises); + + // Throw major error if there are errors + if (errors.length > 0) { + const majorError = + errors.find((error) => !(error instanceof TaskCanceledError)) || + errors[0]; + + throw majorError; + } + } + + return ebdTexts.map((item) => resultMap.get(item.content)); + } + + private similarity(a: EmbeddingVector, b: EmbeddingVector): number { + let dot = 0; + let magA = 0; + let magB = 0; + + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + magA += a[i] * a[i]; + magB += b[i] * b[i]; + } + + return dot / (Math.sqrt(magA) * Math.sqrt(magB)); + } + + private getCacheKey(content: string): string { + const db = getDatabase(); + const suffix = + this.options.model === "custom" && db.hypaCustomSettings?.model?.trim() + ? `-${db.hypaCustomSettings.model.trim()}` + : ""; + + return `${content}|${this.options.model}${suffix}`; + } + + private async getOptimalChunkSize(): Promise { + // API + if (!this.isLocalModel()) { + return 50; + } + + const isMobile = /Android|iPhone|iPad|iPod|webOS/i.test( + navigator.userAgent + ); + + // WebGPU + if ("gpu" in navigator) { + return isMobile ? 5 : 10; + } + + // WASM + const cpuCores = navigator.hardwareConcurrency || 4; + const baseChunkSize = isMobile ? Math.floor(cpuCores / 2) : cpuCores; + + return Math.min(baseChunkSize, 10); + } + + private isLocalModel(): boolean { + return Object.keys(localModels.models).includes(this.options.model); + } + + private chunkArray(array: T[], size: number): T[][] { + const chunks: T[][] = []; + + for (let i = 0; i < array.length; i += size) { + chunks.push(array.slice(i, i + size)); + } + + return chunks; + } + + private async getLocalEmbeds(contents: string[]): Promise { + const results: Float32Array[] = await runEmbedding( + contents, + localModels.models[this.options.model], + localModels.gpuModels.includes(this.options.model) ? "webgpu" : "wasm" + ); + + return results; + } + + private async getAPIEmbeds(contents: string[]): Promise { + const db = getDatabase(); + let response = null; + + if (this.options.model === "custom") { + if (!this.options.customEmbeddingUrl) { + throw new Error("Custom model requires a Custom Server URL"); + } + + const replaceUrl = this.options.customEmbeddingUrl.endsWith("/embeddings") + ? this.options.customEmbeddingUrl + : appendLastPath(this.options.customEmbeddingUrl, "embeddings"); + + const fetchArgs = { + headers: { + ...(db.hypaCustomSettings?.key?.trim() + ? { Authorization: "Bearer " + db.hypaCustomSettings.key.trim() } + : {}), + }, + body: { + input: contents, + ...(db.hypaCustomSettings?.model?.trim() + ? { model: db.hypaCustomSettings.model.trim() } + : {}), + }, + }; + + response = await globalFetch(replaceUrl, fetchArgs); + } else if ( + ["ada", "openai3small", "openai3large"].includes(this.options.model) + ) { + const models = { + ada: "text-embedding-ada-002", + openai3small: "text-embedding-3-small", + openai3large: "text-embedding-3-large", + }; + + const fetchArgs = { + headers: { + Authorization: + "Bearer " + + (this.options.oaiKey?.trim() || db.supaMemoryKey?.trim()), + }, + body: { + input: contents, + model: models[this.options.model], + }, + }; + + response = await globalFetch( + "https://api.openai.com/v1/embeddings", + fetchArgs + ); + } else { + throw new Error(`Unsupported model: ${this.options.model}`); + } + + if (!response.ok || !response.data.data) { + throw new Error(JSON.stringify(response.data)); + } + + const embeddings: EmbeddingVector[] = response.data.data.map( + (item: { embedding: EmbeddingVector }) => { + if (!item.embedding) { + throw new Error("No embeddings found in the response."); + } + + return item.embedding; + } + ); + + return embeddings; + } +} diff --git a/src/ts/process/memory/hypav3.ts b/src/ts/process/memory/hypav3.ts index f1e9f8a1..7985cf53 100644 --- a/src/ts/process/memory/hypav3.ts +++ b/src/ts/process/memory/hypav3.ts @@ -1,8 +1,6 @@ -import { - type VectorArray, - type memoryVector, - HypaProcesser, -} from "./hypamemory"; +import { type memoryVector, HypaProcesser, similarity } from "./hypamemory"; +import { TaskRateLimiter } from "./taskRateLimiter"; +import { type EmbeddingText, HypaProcessorV2 } from "./hypamemoryv2"; import { type Chat, type character, @@ -11,15 +9,34 @@ import { } from "src/ts/storage/database.svelte"; import { type OpenAIChat } from "../index.svelte"; import { requestChatData } from "../request"; -import { runSummarizer } from "../transformers"; -import { globalFetch } from "src/ts/globalApi.svelte"; +import { chatCompletion, unloadEngine } from "../webllm"; import { parseChatML } from "src/ts/parser.svelte"; +import { hypaV3ProgressStore } from "src/ts/stores.svelte"; import { type ChatTokenizer } from "src/ts/tokenizer"; -interface Summary { - text: string; - chatMemos: Set; - isImportant: boolean; +export interface HypaV3Preset { + name: string; + settings: HypaV3Settings; +} + +export interface HypaV3Settings { + summarizationModel: string; + summarizationPrompt: string; + memoryTokensRatio: number; + extraSummarizationRatio: number; + maxChatsPerSummary: number; + recentMemoryRatio: number; + similarMemoryRatio: number; + enableSimilarityCorrection: boolean; + preserveOrphanedMemory: boolean; + processRegexScript: boolean; + doNotSummarizeUserMessage: boolean; + // Experimental + useExperimentalImpl: boolean; + summarizationRequestsPerMinute: number; + summarizationMaxConcurrent: number; + embeddingRequestsPerMinute: number; + embeddingMaxConcurrent: number; } interface HypaV3Data { @@ -36,228 +53,29 @@ export interface SerializableHypaV3Data { lastSelectedSummaries?: number[]; } +interface Summary { + text: string; + chatMemos: Set; + isImportant: boolean; +} + interface SummaryChunk { text: string; summary: Summary; } +export interface HypaV3Result { + currentTokens: number; + chats: OpenAIChat[]; + error?: string; + memory?: SerializableHypaV3Data; +} + +const logPrefix = "[HypaV3]"; +const memoryPromptTag = "Past Events Summary"; const minChatsForSimilarity = 3; -const maxSummarizationFailures = 3; const summarySeparator = "\n\n"; -// Helper function to check if one Set is a subset of another -function isSubset(subset: Set, superset: Set): boolean { - for (const elem of subset) { - if (!superset.has(elem)) { - return false; - } - } - return true; -} - -function toSerializableHypaV3Data(data: HypaV3Data): SerializableHypaV3Data { - return { - ...data, - summaries: data.summaries.map((summary) => ({ - ...summary, - chatMemos: [...summary.chatMemos], - })), - }; -} - -function toHypaV3Data(serialData: SerializableHypaV3Data): HypaV3Data { - return { - ...serialData, - summaries: serialData.summaries.map((summary) => ({ - ...summary, - // Convert null back to undefined (JSON serialization converts undefined to null) - chatMemos: new Set( - summary.chatMemos.map((memo) => (memo === null ? undefined : memo)) - ), - })), - }; -} - -function encapsulateMemoryPrompt(memoryPrompt: string): string { - return `${memoryPrompt}`; -} - -function cleanOrphanedSummary(chats: OpenAIChat[], data: HypaV3Data): void { - // Collect all memos from current chats - const currentChatMemos = new Set(chats.map((chat) => chat.memo)); - const originalLength = data.summaries.length; - - // Filter summaries - keep only those whose chatMemos are subset of current chat memos - data.summaries = data.summaries.filter((summary) => { - return isSubset(summary.chatMemos, currentChatMemos); - }); - - const removedCount = originalLength - data.summaries.length; - - if (removedCount > 0) { - console.log(`[HypaV3] Cleaned ${removedCount} orphaned summaries.`); - } -} - -export async function summarize( - oaiChats: OpenAIChat[] -): Promise<{ success: boolean; data: string }> { - const db = getDatabase(); - const stringifiedChats = oaiChats - .map((chat) => `${chat.role}: ${chat.content}`) - .join("\n"); - - if (db.supaModelType === "distilbart") { - try { - const summaryText = (await runSummarizer(stringifiedChats)).trim(); - return { success: true, data: summaryText }; - } catch (error) { - return { - success: false, - data: error, - }; - } - } - - const summarizePrompt = - db.supaMemoryPrompt === "" - ? "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]" - : db.supaMemoryPrompt; - - switch (db.supaModelType) { - case "instruct35": { - console.log( - "[HypaV3] Using openAI gpt-3.5-turbo-instruct for summarization." - ); - - const requestPrompt = `${stringifiedChats}\n\n${summarizePrompt}\n\nOutput:`; - const response = await globalFetch( - "https://api.openai.com/v1/completions", - { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: "Bearer " + db.supaMemoryKey, - }, - body: { - model: "gpt-3.5-turbo-instruct", - prompt: requestPrompt, - max_tokens: db.maxResponse, - temperature: 0, - }, - } - ); - - try { - if (!response.ok) { - return { - success: false, - data: JSON.stringify(response), - }; - } - - const summaryText = - response.data?.choices?.[0]?.message?.content?.trim(); - - if (!summaryText) { - return { - success: false, - data: JSON.stringify(response), - }; - } - - return { success: true, data: summaryText }; - } catch (error) { - return { - success: false, - data: error, - }; - } - } - - case "subModel": { - console.log(`[HypaV3] Using ax model ${db.subModel} for summarization.`); - - const requestMessages: OpenAIChat[] = parseChatML( - summarizePrompt.replaceAll("{{slot}}", stringifiedChats) - ) ?? [ - { - role: "user", - content: stringifiedChats, - }, - { - role: "system", - content: summarizePrompt, - }, - ]; - - const response = await requestChatData( - { - formated: requestMessages, - bias: {}, - useStreaming: false, - noMultiGen: true, - }, - "memory" - ); - - if (response.type === "streaming" || response.type === "multiline") { - return { - success: false, - data: "unexpected response type", - }; - } - - if (response.type === "fail") { - return { - success: false, - data: response.result, - }; - } - - return { success: true, data: response.result.trim() }; - } - - default: { - return { - success: false, - data: `unsupported model ${db.supaModelType} for summarization`, - }; - } - } -} - -async function retryableSummarize( - oaiChats: OpenAIChat[] -): Promise<{ success: boolean; data: string }> { - let summarizationFailures = 0; - - while (summarizationFailures < maxSummarizationFailures) { - console.log( - "[HypaV3] Attempting summarization:", - "\nAttempt:", - summarizationFailures + 1, - "\nTarget:", - oaiChats - ); - - const summarizeResult = await summarize(oaiChats); - - if (!summarizeResult.success) { - console.log("[HypaV3] Summarization failed:", summarizeResult.data); - summarizationFailures++; - - if (summarizationFailures >= maxSummarizationFailures) { - return summarizeResult; - } - - continue; - } - - return summarizeResult; - } -} - export async function hypaMemoryV3( chats: OpenAIChat[], currentTokens: number, @@ -265,24 +83,74 @@ export async function hypaMemoryV3( room: Chat, char: character | groupChat, tokenizer: ChatTokenizer -): Promise<{ - currentTokens: number; - chats: OpenAIChat[]; - error?: string; - memory?: SerializableHypaV3Data; -}> { +): Promise { + const settings = getCurrentHypaV3Preset().settings; + + try { + if (settings.useExperimentalImpl) { + console.log(logPrefix, "Using experimental implementation."); + + return await hypaMemoryV3MainExp( + chats, + currentTokens, + maxContextTokens, + room, + char, + tokenizer + ); + } + + return await hypaMemoryV3Main( + chats, + currentTokens, + maxContextTokens, + room, + char, + tokenizer + ); + } catch (error) { + if (error instanceof Error) { + // Standard Error instance + error.message = `${logPrefix} ${error.message}`; + throw error; + } + + // Fallback for non-Error object + let errorMessage: string; + + try { + errorMessage = JSON.stringify(error); + } catch { + errorMessage = String(error); + } + + throw new Error(`${logPrefix} ${errorMessage}`); + } finally { + if (settings.summarizationModel !== "subModel") { + try { + unloadEngine(); + } catch {} + } + } +} + +async function hypaMemoryV3MainExp( + chats: OpenAIChat[], + currentTokens: number, + maxContextTokens: number, + room: Chat, + char: character | groupChat, + tokenizer: ChatTokenizer +): Promise { const db = getDatabase(); + const settings = getCurrentHypaV3Preset().settings; // Validate settings - if ( - db.hypaV3Settings.recentMemoryRatio + db.hypaV3Settings.similarMemoryRatio > - 1 - ) { + if (settings.recentMemoryRatio + settings.similarMemoryRatio > 1) { return { currentTokens, chats, - error: - "[HypaV3] The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.", + error: `${logPrefix} The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.`, }; } @@ -300,7 +168,7 @@ export async function hypaMemoryV3( } // Clean orphaned summaries - if (!db.hypaV3Settings.preserveOrphanedMemory) { + if (!settings.preserveOrphanedMemory) { cleanOrphanedSummary(chats, data); } @@ -324,33 +192,32 @@ export async function hypaMemoryV3( } } + console.log(logPrefix, "Starting index:", startIdx); + // Reserve memory tokens const emptyMemoryTokens = await tokenizer.tokenizeChat({ role: "system", - content: encapsulateMemoryPrompt(""), + content: wrapWithXml(memoryPromptTag, ""), }); const memoryTokens = Math.floor( - maxContextTokens * db.hypaV3Settings.memoryTokensRatio + maxContextTokens * settings.memoryTokensRatio ); - const shouldReserveEmptyMemoryTokens = - data.summaries.length === 0 && - currentTokens + emptyMemoryTokens <= maxContextTokens; - let availableMemoryTokens = shouldReserveEmptyMemoryTokens - ? 0 - : memoryTokens - emptyMemoryTokens; + const shouldReserveMemoryTokens = + data.summaries.length > 0 || currentTokens > maxContextTokens; + let availableMemoryTokens = shouldReserveMemoryTokens + ? memoryTokens - emptyMemoryTokens + : 0; - if (shouldReserveEmptyMemoryTokens) { - currentTokens += emptyMemoryTokens; - console.log("[HypaV3] Reserved empty memory tokens:", emptyMemoryTokens); - } else { + if (shouldReserveMemoryTokens) { currentTokens += memoryTokens; - console.log("[HypaV3] Reserved max memory tokens:", memoryTokens); + console.log(logPrefix, "Reserved memory tokens:", memoryTokens); } // If summarization is needed - let summarizationMode = currentTokens > maxContextTokens; + const summarizationMode = currentTokens > maxContextTokens; const targetTokens = - maxContextTokens * (1 - db.hypaV3Settings.extraSummarizationRatio); + maxContextTokens * (1 - settings.extraSummarizationRatio); + const toSummarizeArray: OpenAIChat[][] = []; while (summarizationMode) { if (currentTokens <= targetTokens) { @@ -364,43 +231,41 @@ export async function hypaMemoryV3( return { currentTokens, chats, - error: `[HypaV3] Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`, + error: `${logPrefix} Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`, memory: toSerializableHypaV3Data(data), }; } } const toSummarize: OpenAIChat[] = []; - const endIdx = Math.min( - startIdx + db.hypaV3Settings.maxChatsPerSummary, - chats.length - minChatsForSimilarity - ); let toSummarizeTokens = 0; + let currentIndex = startIdx; console.log( - "[HypaV3] Evaluating summarization batch:", + logPrefix, + "Evaluating summarization batch:", "\nCurrent Tokens:", currentTokens, "\nMax Context Tokens:", maxContextTokens, "\nStart Index:", startIdx, - "\nEnd Index:", - endIdx, - "\nChat Count:", - endIdx - startIdx, "\nMax Chats Per Summary:", - db.hypaV3Settings.maxChatsPerSummary + settings.maxChatsPerSummary ); - for (let i = startIdx; i < endIdx; i++) { - const chat = chats[i]; + while ( + toSummarize.length < settings.maxChatsPerSummary && + currentIndex < chats.length - minChatsForSimilarity + ) { + const chat = chats[currentIndex]; const chatTokens = await tokenizer.tokenizeChat(chat); console.log( - "[HypaV3] Evaluating chat:", + logPrefix, + "Evaluating chat:", "\nIndex:", - i, + currentIndex, "\nRole:", chat.role, "\nContent:", @@ -411,23 +276,40 @@ export async function hypaMemoryV3( toSummarizeTokens += chatTokens; - if (i === 0 || !chat.content.trim()) { + let shouldSummarize = true; + + if ( + chat.name === "example_user" || + chat.name === "example_assistant" || + chat.memo === "NewChatExample" + ) { console.log( - `[HypaV3] Skipping ${ - i === 0 ? "[Start a new chat]" : "empty content" - } at index ${i}` + logPrefix, + `Skipping example chat at index ${currentIndex}` ); - - continue; + shouldSummarize = false; } - if (db.hypaV3Settings.doNotSummarizeUserMessage && chat.role === "user") { - console.log(`[HypaV3] Skipping user role at index ${i}`); - - continue; + if (chat.memo === "NewChat") { + console.log(logPrefix, `Skipping new chat at index ${currentIndex}`); + shouldSummarize = false; } - toSummarize.push(chat); + if (chat.content.trim().length === 0) { + console.log(logPrefix, `Skipping empty chat at index ${currentIndex}`); + shouldSummarize = false; + } + + if (settings.doNotSummarizeUserMessage && chat.role === "user") { + console.log(logPrefix, `Skipping user role at index ${currentIndex}`); + shouldSummarize = false; + } + + if (shouldSummarize) { + toSummarize.push(chat); + } + + currentIndex++; } // Stop summarization if further reduction would go below target tokens (unless we're over max tokens) @@ -436,43 +318,119 @@ export async function hypaMemoryV3( currentTokens - toSummarizeTokens < targetTokens ) { console.log( - `[HypaV3] Stopping summarization: currentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})` + logPrefix, + "Stopping summarization:", + `\ncurrentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})` ); break; } - // Attempt summarization + // Collect summarization batch if (toSummarize.length > 0) { - const summarizeResult = await retryableSummarize(toSummarize); + console.log( + logPrefix, + "Collecting summarization batch:", + "\nTarget:", + toSummarize + ); + + toSummarizeArray.push([...toSummarize]); + } + + currentTokens -= toSummarizeTokens; + startIdx = currentIndex; + } + + // Process all collected summarization tasks + if (toSummarizeArray.length > 0) { + // Initialize rate limiter + // Local model must be processed sequentially + const rateLimiter = new TaskRateLimiter({ + tasksPerMinute: + settings.summarizationModel === "subModel" + ? settings.summarizationRequestsPerMinute + : 1000, + maxConcurrentTasks: + settings.summarizationModel === "subModel" + ? settings.summarizationMaxConcurrent + : 1, + }); + + rateLimiter.taskQueueChangeCallback = (queuedCount) => { + hypaV3ProgressStore.set({ + open: true, + miniMsg: `${rateLimiter.queuedTaskCount}`, + msg: `${logPrefix} Summarizing...`, + subMsg: `${rateLimiter.queuedTaskCount} queued`, + }); + }; + + const summarizationTasks = toSummarizeArray.map( + (item) => () => summarize(item) + ); + + // Start of performance measurement: summarize + console.log( + logPrefix, + `Starting ${toSummarizeArray.length} summarization.` + ); + const summarizeStartTime = performance.now(); + + const batchResult = await rateLimiter.executeBatch( + summarizationTasks + ); + + const summarizeEndTime = performance.now(); + console.debug( + `${logPrefix} summarization completed in ${ + summarizeEndTime - summarizeStartTime + }ms` + ); + // End of performance measurement: summarize + + hypaV3ProgressStore.set({ + open: false, + miniMsg: "", + msg: "", + subMsg: "", + }); + + // Note: + // We can't save some successful summaries to the DB temporarily + // because don't know the actual summarization model name. + // It is possible that the user can change the summarization model. + for (let i = 0; i < batchResult.results.length; i++) { + const result = batchResult.results[i]; + + // Push consecutive successes + if (!result.success || !result.data) { + const errorMessage = !result.success + ? result.error + : "Empty summary returned"; + + console.log(logPrefix, "Summarization failed:", `\n${errorMessage}`); - if ( - !summarizeResult.success || - !summarizeResult.data || - summarizeResult.data.trim().length === 0 - ) { return { currentTokens, chats, - error: `[HypaV3] Summarization failed after maximum retries: ${summarizeResult.data}`, + error: `${logPrefix} Summarization failed: ${errorMessage}`, memory: toSerializableHypaV3Data(data), }; } + const summaryText = result.data; + data.summaries.push({ - text: summarizeResult.data, - chatMemos: new Set(toSummarize.map((chat) => chat.memo)), + text: summaryText, + chatMemos: new Set(toSummarizeArray[i].map((chat) => chat.memo)), isImportant: false, }); } - - currentTokens -= toSummarizeTokens; - startIdx = endIdx; } console.log( - `[HypaV3] ${ - summarizationMode ? "Completed" : "Skipped" - } summarization phase:`, + logPrefix, + `${summarizationMode ? "Completed" : "Skipped"} summarization phase:`, "\nCurrent Tokens:", currentTokens, "\nMax Context Tokens:", @@ -483,20 +441,11 @@ export async function hypaMemoryV3( // Early return if no summaries if (data.summaries.length === 0) { - // Generate final memory prompt - const memory = encapsulateMemoryPrompt(""); - - const newChats: OpenAIChat[] = [ - { - role: "system", - content: memory, - memo: "supaMemory", - }, - ...chats.slice(startIdx), - ]; + const newChats: OpenAIChat[] = chats.slice(startIdx); console.log( - "[HypaV3] Exiting function:", + logPrefix, + "Exiting function:", "\nCurrent Tokens:", currentTokens, "\nAll chats, including memory prompt:", @@ -514,49 +463,50 @@ export async function hypaMemoryV3( const selectedSummaries: Summary[] = []; const randomMemoryRatio = - 1 - - db.hypaV3Settings.recentMemoryRatio - - db.hypaV3Settings.similarMemoryRatio; + 1 - settings.recentMemoryRatio - settings.similarMemoryRatio; // Select important summaries - const selectedImportantSummaries: Summary[] = []; + { + const selectedImportantSummaries: Summary[] = []; - for (const summary of data.summaries) { - if (summary.isImportant) { - const summaryTokens = await tokenizer.tokenizeChat({ - role: "system", - content: summary.text + summarySeparator, - }); + for (const summary of data.summaries) { + if (summary.isImportant) { + const summaryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: summary.text + summarySeparator, + }); - if (summaryTokens > availableMemoryTokens) { - break; + if (summaryTokens > availableMemoryTokens) { + break; + } + + selectedImportantSummaries.push(summary); + + availableMemoryTokens -= summaryTokens; } - - selectedImportantSummaries.push(summary); - - availableMemoryTokens -= summaryTokens; } + + selectedSummaries.push(...selectedImportantSummaries); + + console.log( + logPrefix, + "After important memory selection:", + "\nSummary Count:", + selectedImportantSummaries.length, + "\nSummaries:", + selectedImportantSummaries, + "\nAvailable Memory Tokens:", + availableMemoryTokens + ); } - selectedSummaries.push(...selectedImportantSummaries); - - console.log( - "[HypaV3] After important memory selection:", - "\nSummary Count:", - selectedImportantSummaries.length, - "\nSummaries:", - selectedImportantSummaries, - "\nAvailable Memory Tokens:", - availableMemoryTokens - ); - // Select recent summaries const reservedRecentMemoryTokens = Math.floor( - availableMemoryTokens * db.hypaV3Settings.recentMemoryRatio + availableMemoryTokens * settings.recentMemoryRatio ); let consumedRecentMemoryTokens = 0; - if (db.hypaV3Settings.recentMemoryRatio > 0) { + if (settings.recentMemoryRatio > 0) { const selectedRecentSummaries: Summary[] = []; // Target only summaries that haven't been selected yet @@ -586,7 +536,8 @@ export async function hypaMemoryV3( selectedSummaries.push(...selectedRecentSummaries); console.log( - "[HypaV3] After recent memory selection:", + logPrefix, + "After recent memory selection:", "\nSummary Count:", selectedRecentSummaries.length, "\nSummaries:", @@ -600,11 +551,11 @@ export async function hypaMemoryV3( // Select similar summaries let reservedSimilarMemoryTokens = Math.floor( - availableMemoryTokens * db.hypaV3Settings.similarMemoryRatio + availableMemoryTokens * settings.similarMemoryRatio ); let consumedSimilarMemoryTokens = 0; - if (db.hypaV3Settings.similarMemoryRatio > 0) { + if (settings.similarMemoryRatio > 0) { const selectedSimilarSummaries: Summary[] = []; // Utilize unused token space from recent selection @@ -614,7 +565,8 @@ export async function hypaMemoryV3( reservedSimilarMemoryTokens += unusedRecentTokens; console.log( - "[HypaV3] Additional available token space for similar memory:", + logPrefix, + "Additional available token space for similar memory:", "\nFrom recent:", unusedRecentTokens ); @@ -625,155 +577,182 @@ export async function hypaMemoryV3( (e) => !selectedSummaries.includes(e) ); - // Dynamically generate summary chunks - const summaryChunks: SummaryChunk[] = []; + // Dynamically generate embedding texts + const ebdTexts: EmbeddingText[] = unusedSummaries.flatMap( + (summary) => { + const splitted = summary.text + .split("\n\n") + .filter((e) => e.trim().length > 0); - unusedSummaries.forEach((summary) => { - const splitted = summary.text - .split("\n\n") - .filter((e) => e.trim().length > 0); + return splitted.map((e) => ({ + content: e.trim(), + metadata: summary, + })); + } + ); - summaryChunks.push( - ...splitted.map((e) => ({ - text: e.trim(), - summary, - })) - ); + // Initialize embedding processor + const processor = new HypaProcessorV2({ + rateLimiter: new TaskRateLimiter({ + tasksPerMinute: settings.embeddingRequestsPerMinute, + maxConcurrentTasks: settings.embeddingMaxConcurrent, + }), }); - // Fetch memory from summaryChunks - const processor = new HypaProcesserEx(db.hypaModel); - processor.oaikey = db.supaMemoryKey; + processor.progressCallback = (queuedCount) => { + hypaV3ProgressStore.set({ + open: true, + miniMsg: `${queuedCount}`, + msg: `${logPrefix} Similarity searching...`, + subMsg: `${queuedCount} queued`, + }); + }; - // Add summaryChunks to processor for similarity search try { - await processor.addSummaryChunks(summaryChunks); + // Start of performance measurement: addTexts + console.log( + `${logPrefix} Starting addTexts with ${ebdTexts.length} chunks` + ); + const addStartTime = performance.now(); + + // Add EmbeddingTexts to processor for similarity search + await processor.addTexts(ebdTexts); + + const addEndTime = performance.now(); + console.debug( + `${logPrefix} addTexts completed in ${addEndTime - addStartTime}ms` + ); + // End of performance measurement: addTexts } catch (error) { return { currentTokens, chats, - error: `[HypaV3] Similarity search failed: ${error}`, + error: `${logPrefix} Similarity search failed: ${error}`, memory: toSerializableHypaV3Data(data), }; - } - - const scoredSummaries = new Map(); - - // (1) Raw recent chat search - for (let i = 0; i < minChatsForSimilarity; i++) { - const pop = chats[chats.length - i - 1]; - - if (!pop) break; - - try { - const searched = await processor.similaritySearchScoredEx(pop.content); - - for (const [chunk, similarity] of searched) { - const summary = chunk.summary; - - scoredSummaries.set( - summary, - (scoredSummaries.get(summary) || 0) + similarity - ); - } - } catch (error) { - return { - currentTokens, - chats, - error: `[HypaV3] Similarity search failed: ${error}`, - memory: toSerializableHypaV3Data(data), - }; - } - } - - // (2) Summarized recent chat search - if (db.hypaV3Settings.enableSimilarityCorrection) { - // Attempt summarization - const recentChats = chats.slice(-minChatsForSimilarity); - const summarizeResult = await retryableSummarize(recentChats); - - if ( - !summarizeResult.success || - !summarizeResult.data || - summarizeResult.data.trim().length === 0 - ) { - return { - currentTokens, - chats, - error: `[HypaV3] Summarization failed after maximum retries: ${summarizeResult.data}`, - memory: toSerializableHypaV3Data(data), - }; - } - - try { - const searched = await processor.similaritySearchScoredEx( - summarizeResult.data - ); - - for (const [chunk, similarity] of searched) { - const summary = chunk.summary; - - scoredSummaries.set( - summary, - (scoredSummaries.get(summary) || 0) + similarity - ); - } - } catch (error) { - return { - currentTokens, - chats, - error: `[HypaV3] Similarity search failed: ${error}`, - memory: toSerializableHypaV3Data(data), - }; - } - - console.log("[HypaV3] Similarity corrected."); - } - - // Sort in descending order - const scoredArray = [...scoredSummaries.entries()].sort( - ([, scoreA], [, scoreB]) => scoreB - scoreA - ); - - while (scoredArray.length > 0) { - const [summary] = scoredArray.shift(); - const summaryTokens = await tokenizer.tokenizeChat({ - role: "system", - content: summary.text + summarySeparator, + } finally { + hypaV3ProgressStore.set({ + open: false, + miniMsg: "", + msg: "", + subMsg: "", }); - - /* - console.log( - "[HypaV3] Trying to add similar summary:", - "\nSummary Tokens:", - summaryTokens, - "\nConsumed Similar Memory Tokens:", - consumedSimilarMemoryTokens, - "\nReserved Tokens:", - reservedSimilarMemoryTokens, - "\nWould exceed:", - summaryTokens + consumedSimilarMemoryTokens > reservedSimilarMemoryTokens - ); - */ - - if ( - summaryTokens + consumedSimilarMemoryTokens > - reservedSimilarMemoryTokens - ) { - console.log( - `[HypaV3] Stopping similar memory selection: consumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})` - ); - break; - } - - selectedSimilarSummaries.push(summary); - consumedSimilarMemoryTokens += summaryTokens; } - selectedSummaries.push(...selectedSimilarSummaries); + const recentChats = chats + .slice(-minChatsForSimilarity) + .filter((chat) => chat.content.trim().length > 0); + const queries: string[] = recentChats.flatMap((chat) => { + return chat.content.split("\n\n").filter((e) => e.trim().length > 0); + }); + + if (queries.length > 0) { + const scoredSummaries = new Map(); + + try { + // Start of performance measurement: similarity search + console.log( + `${logPrefix} Starting similarity search with ${recentChats.length} queries` + ); + const searchStartTime = performance.now(); + + const batchScoredResults = await processor.similaritySearchScoredBatch( + queries + ); + + const searchEndTime = performance.now(); + console.debug( + `${logPrefix} Similarity search completed in ${ + searchEndTime - searchStartTime + }ms` + ); + // End of performance measurement: similarity search + + for (const scoredResults of batchScoredResults) { + for (const [ebdResult, similarity] of scoredResults) { + const summary = ebdResult.metadata; + + scoredSummaries.set( + summary, + (scoredSummaries.get(summary) || 0) + similarity + ); + } + } + } catch (error) { + return { + currentTokens, + chats, + error: `${logPrefix} Similarity search failed: ${error}`, + memory: toSerializableHypaV3Data(data), + }; + } finally { + hypaV3ProgressStore.set({ + open: false, + miniMsg: "", + msg: "", + subMsg: "", + }); + } + + // Normalize scores + if (scoredSummaries.size > 0) { + const maxScore = Math.max(...scoredSummaries.values()); + + for (const [summary, score] of scoredSummaries.entries()) { + scoredSummaries.set(summary, score / maxScore); + } + } + + // Sort in descending order + const scoredArray = [...scoredSummaries.entries()].sort( + ([, scoreA], [, scoreB]) => scoreB - scoreA + ); + + while (scoredArray.length > 0) { + const [summary] = scoredArray.shift(); + const summaryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: summary.text + summarySeparator, + }); + + /* + console.log( + logPrefix, + "Trying to add similar summary:", + "\nSummary Tokens:", + summaryTokens, + "\nConsumed Similar Memory Tokens:", + consumedSimilarMemoryTokens, + "\nReserved Tokens:", + reservedSimilarMemoryTokens, + "\nWould exceed:", + summaryTokens + consumedSimilarMemoryTokens > + reservedSimilarMemoryTokens + ); + */ + + if ( + summaryTokens + consumedSimilarMemoryTokens > + reservedSimilarMemoryTokens + ) { + console.log( + logPrefix, + "Stopping similar memory selection:", + `\nconsumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})` + ); + break; + } + + selectedSimilarSummaries.push(summary); + consumedSimilarMemoryTokens += summaryTokens; + } + + selectedSummaries.push(...selectedSimilarSummaries); + } console.log( - "[HypaV3] After similar memory selection:", + logPrefix, + "After similar memory selection:", "\nSummary Count:", selectedSimilarSummaries.length, "\nSummaries:", @@ -802,7 +781,8 @@ export async function hypaMemoryV3( reservedRandomMemoryTokens += unusedRecentTokens + unusedSimilarTokens; console.log( - "[HypaV3] Additional available token space for random memory:", + logPrefix, + "Additional available token space for random memory:", "\nFrom recent:", unusedRecentTokens, "\nFrom similar:", @@ -837,7 +817,8 @@ export async function hypaMemoryV3( selectedSummaries.push(...selectedRandomSummaries); console.log( - "[HypaV3] After random memory selection:", + logPrefix, + "After random memory selection:", "\nSummary Count:", selectedRandomSummaries.length, "\nSummaries:", @@ -855,7 +836,8 @@ export async function hypaMemoryV3( ); // Generate final memory prompt - const memory = encapsulateMemoryPrompt( + const memory = wrapWithXml( + memoryPromptTag, selectedSummaries.map((e) => e.text).join(summarySeparator) ); const realMemoryTokens = await tokenizer.tokenizeChat({ @@ -864,16 +846,15 @@ export async function hypaMemoryV3( }); // Release reserved memory tokens - if (shouldReserveEmptyMemoryTokens) { - currentTokens -= emptyMemoryTokens; - } else { + if (shouldReserveMemoryTokens) { currentTokens -= memoryTokens; } currentTokens += realMemoryTokens; console.log( - "[HypaV3] Final memory selection:", + logPrefix, + "Final memory selection:", "\nSummary Count:", selectedSummaries.length, "\nSummaries:", @@ -886,7 +867,7 @@ export async function hypaMemoryV3( if (currentTokens > maxContextTokens) { throw new Error( - `[HypaV3] Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})` + `Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})` ); } @@ -905,7 +886,8 @@ export async function hypaMemoryV3( ]; console.log( - "[HypaV3] Exiting function:", + logPrefix, + "Exiting function:", "\nCurrent Tokens:", currentTokens, "\nAll chats, including memory prompt:", @@ -921,26 +903,870 @@ export async function hypaMemoryV3( }; } -type SummaryChunkVector = { +async function hypaMemoryV3Main( + chats: OpenAIChat[], + currentTokens: number, + maxContextTokens: number, + room: Chat, + char: character | groupChat, + tokenizer: ChatTokenizer +): Promise { + const db = getDatabase(); + const settings = getCurrentHypaV3Preset().settings; + + // Validate settings + if (settings.recentMemoryRatio + settings.similarMemoryRatio > 1) { + return { + currentTokens, + chats, + error: `${logPrefix} The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.`, + }; + } + + // Initial token correction + currentTokens -= db.maxResponse; + + // Load existing hypa data if available + let data: HypaV3Data = { + summaries: [], + lastSelectedSummaries: [], + }; + + if (room.hypaV3Data) { + data = toHypaV3Data(room.hypaV3Data); + } + + // Clean orphaned summaries + if (!settings.preserveOrphanedMemory) { + cleanOrphanedSummary(chats, data); + } + + // Determine starting index + let startIdx = 0; + + if (data.summaries.length > 0) { + const lastSummary = data.summaries.at(-1); + const lastChatIndex = chats.findIndex( + (chat) => chat.memo === [...lastSummary.chatMemos].at(-1) + ); + + if (lastChatIndex !== -1) { + startIdx = lastChatIndex + 1; + + // Exclude tokens from summarized chats + const summarizedChats = chats.slice(0, lastChatIndex + 1); + for (const chat of summarizedChats) { + currentTokens -= await tokenizer.tokenizeChat(chat); + } + } + } + + console.log(logPrefix, "Starting index:", startIdx); + + // Reserve memory tokens + const emptyMemoryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: wrapWithXml(memoryPromptTag, ""), + }); + const memoryTokens = Math.floor( + maxContextTokens * settings.memoryTokensRatio + ); + const shouldReserveEmptyMemoryTokens = + data.summaries.length === 0 && + currentTokens + emptyMemoryTokens <= maxContextTokens; + let availableMemoryTokens = shouldReserveEmptyMemoryTokens + ? 0 + : memoryTokens - emptyMemoryTokens; + + if (shouldReserveEmptyMemoryTokens) { + currentTokens += emptyMemoryTokens; + console.log(logPrefix, "Reserved empty memory tokens:", emptyMemoryTokens); + } else { + currentTokens += memoryTokens; + console.log(logPrefix, "Reserved max memory tokens:", memoryTokens); + } + + // If summarization is needed + const summarizationMode = currentTokens > maxContextTokens; + const targetTokens = + maxContextTokens * (1 - settings.extraSummarizationRatio); + + while (summarizationMode) { + if (currentTokens <= targetTokens) { + break; + } + + if (chats.length - startIdx <= minChatsForSimilarity) { + if (currentTokens <= maxContextTokens) { + break; + } else { + return { + currentTokens, + chats, + error: `${logPrefix} Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`, + memory: toSerializableHypaV3Data(data), + }; + } + } + + const toSummarize: OpenAIChat[] = []; + const endIdx = Math.min( + startIdx + settings.maxChatsPerSummary, + chats.length - minChatsForSimilarity + ); + let toSummarizeTokens = 0; + + console.log( + logPrefix, + "Evaluating summarization batch:", + "\nCurrent Tokens:", + currentTokens, + "\nMax Context Tokens:", + maxContextTokens, + "\nStart Index:", + startIdx, + "\nEnd Index:", + endIdx, + "\nChat Count:", + endIdx - startIdx, + "\nMax Chats Per Summary:", + settings.maxChatsPerSummary + ); + + for (let i = startIdx; i < endIdx; i++) { + const chat = chats[i]; + const chatTokens = await tokenizer.tokenizeChat(chat); + + console.log( + logPrefix, + "Evaluating chat:", + "\nIndex:", + i, + "\nRole:", + chat.role, + "\nContent:", + "\n" + chat.content, + "\nTokens:", + chatTokens + ); + + toSummarizeTokens += chatTokens; + + if ( + chat.name === "example_user" || + chat.name === "example_assistant" || + chat.memo === "NewChatExample" + ) { + console.log(logPrefix, `Skipping example chat at index ${i}`); + continue; + } + + if (chat.memo === "NewChat") { + console.log(logPrefix, `Skipping new chat at index ${i}`); + continue; + } + + if (chat.content.trim().length === 0) { + console.log(logPrefix, `Skipping empty chat at index ${i}`); + continue; + } + + if (settings.doNotSummarizeUserMessage && chat.role === "user") { + console.log(logPrefix, `Skipping user role at index ${i}`); + continue; + } + + toSummarize.push(chat); + } + + // Stop summarization if further reduction would go below target tokens (unless we're over max tokens) + if ( + currentTokens <= maxContextTokens && + currentTokens - toSummarizeTokens < targetTokens + ) { + console.log( + logPrefix, + "Stopping summarization:", + `\ncurrentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})` + ); + break; + } + + // Attempt summarization + if (toSummarize.length > 0) { + console.log( + logPrefix, + "Attempting summarization:", + "\nTarget:", + toSummarize + ); + + try { + const summarizeResult = await summarize(toSummarize); + + data.summaries.push({ + text: summarizeResult, + chatMemos: new Set(toSummarize.map((chat) => chat.memo)), + isImportant: false, + }); + } catch (error) { + console.log(logPrefix, "Summarization failed:", `\n${error}`); + + return { + currentTokens, + chats, + error: `${logPrefix} Summarization failed: ${error}`, + memory: toSerializableHypaV3Data(data), + }; + } + } + + currentTokens -= toSummarizeTokens; + startIdx = endIdx; + } + + console.log( + logPrefix, + `${summarizationMode ? "Completed" : "Skipped"} summarization phase:`, + "\nCurrent Tokens:", + currentTokens, + "\nMax Context Tokens:", + maxContextTokens, + "\nAvailable Memory Tokens:", + availableMemoryTokens + ); + + // Early return if no summaries + if (data.summaries.length === 0) { + // Generate final memory prompt + const memory = wrapWithXml(memoryPromptTag, ""); + + const newChats: OpenAIChat[] = [ + { + role: "system", + content: memory, + memo: "supaMemory", + }, + ...chats.slice(startIdx), + ]; + + console.log( + logPrefix, + "Exiting function:", + "\nCurrent Tokens:", + currentTokens, + "\nAll chats, including memory prompt:", + newChats, + "\nMemory Data:", + data + ); + + return { + currentTokens, + chats: newChats, + memory: toSerializableHypaV3Data(data), + }; + } + + const selectedSummaries: Summary[] = []; + const randomMemoryRatio = + 1 - settings.recentMemoryRatio - settings.similarMemoryRatio; + + // Select important summaries + { + const selectedImportantSummaries: Summary[] = []; + + for (const summary of data.summaries) { + if (summary.isImportant) { + const summaryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: summary.text + summarySeparator, + }); + + if (summaryTokens > availableMemoryTokens) { + break; + } + + selectedImportantSummaries.push(summary); + + availableMemoryTokens -= summaryTokens; + } + } + + selectedSummaries.push(...selectedImportantSummaries); + + console.log( + logPrefix, + "After important memory selection:", + "\nSummary Count:", + selectedImportantSummaries.length, + "\nSummaries:", + selectedImportantSummaries, + "\nAvailable Memory Tokens:", + availableMemoryTokens + ); + } + + // Select recent summaries + const reservedRecentMemoryTokens = Math.floor( + availableMemoryTokens * settings.recentMemoryRatio + ); + let consumedRecentMemoryTokens = 0; + + if (settings.recentMemoryRatio > 0) { + const selectedRecentSummaries: Summary[] = []; + + // Target only summaries that haven't been selected yet + const unusedSummaries = data.summaries.filter( + (e) => !selectedSummaries.includes(e) + ); + + // Add one by one from the end + for (let i = unusedSummaries.length - 1; i >= 0; i--) { + const summary = unusedSummaries[i]; + const summaryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: summary.text + summarySeparator, + }); + + if ( + summaryTokens + consumedRecentMemoryTokens > + reservedRecentMemoryTokens + ) { + break; + } + + selectedRecentSummaries.push(summary); + consumedRecentMemoryTokens += summaryTokens; + } + + selectedSummaries.push(...selectedRecentSummaries); + + console.log( + logPrefix, + "After recent memory selection:", + "\nSummary Count:", + selectedRecentSummaries.length, + "\nSummaries:", + selectedRecentSummaries, + "\nReserved Tokens:", + reservedRecentMemoryTokens, + "\nConsumed Tokens:", + consumedRecentMemoryTokens + ); + } + + // Select similar summaries + let reservedSimilarMemoryTokens = Math.floor( + availableMemoryTokens * settings.similarMemoryRatio + ); + let consumedSimilarMemoryTokens = 0; + + if (settings.similarMemoryRatio > 0) { + const selectedSimilarSummaries: Summary[] = []; + + // Utilize unused token space from recent selection + if (randomMemoryRatio <= 0) { + const unusedRecentTokens = + reservedRecentMemoryTokens - consumedRecentMemoryTokens; + + reservedSimilarMemoryTokens += unusedRecentTokens; + console.log( + logPrefix, + "Additional available token space for similar memory:", + "\nFrom recent:", + unusedRecentTokens + ); + } + + // Target only summaries that haven't been selected yet + const unusedSummaries = data.summaries.filter( + (e) => !selectedSummaries.includes(e) + ); + + // Dynamically generate summary chunks + const summaryChunks: SummaryChunk[] = []; + + unusedSummaries.forEach((summary) => { + const splitted = summary.text + .split("\n\n") + .filter((e) => e.trim().length > 0); + + summaryChunks.push( + ...splitted.map((e) => ({ + text: e.trim(), + summary, + })) + ); + }); + + // Initialize embedding processor + const processor = new HypaProcesserEx(db.hypaModel); + processor.oaikey = db.supaMemoryKey; + + // Add summaryChunks to processor for similarity search + try { + await processor.addSummaryChunks(summaryChunks); + } catch (error) { + return { + currentTokens, + chats, + error: `${logPrefix} Similarity search failed: ${error}`, + memory: toSerializableHypaV3Data(data), + }; + } + + const scoredSummaries = new Map(); + const recentChats = chats + .slice(-minChatsForSimilarity) + .filter((chat) => chat.content.trim().length > 0); + + if (recentChats.length > 0) { + // Raw recent chat search + const queries = recentChats.map((chat) => chat.content); + + if (settings.enableSimilarityCorrection && recentChats.length > 1) { + // Raw + Summarized recent chat search + // Summarizing is meaningful when there are more than 2 recent chats + + // Attempt summarization + console.log( + logPrefix, + "Attempting summarization for similarity search:", + "\nTarget:", + recentChats + ); + + try { + const summarizeResult = await summarize(recentChats); + + queries.push(summarizeResult); + } catch (error) { + console.log(logPrefix, "Summarization failed:", `\n${error}`); + + return { + currentTokens, + chats, + error: `${logPrefix} Summarization failed: ${error}`, + memory: toSerializableHypaV3Data(data), + }; + } + } + + try { + for (const query of queries) { + const scoredChunks = await processor.similaritySearchScoredEx(query); + + for (const [chunk, similarity] of scoredChunks) { + const summary = chunk.summary; + + scoredSummaries.set( + summary, + (scoredSummaries.get(summary) || 0) + similarity + ); + } + } + } catch (error) { + return { + currentTokens, + chats, + error: `${logPrefix} Similarity search failed: ${error}`, + memory: toSerializableHypaV3Data(data), + }; + } + } + + // Sort in descending order + const scoredArray = [...scoredSummaries.entries()].sort( + ([, scoreA], [, scoreB]) => scoreB - scoreA + ); + + while (scoredArray.length > 0) { + const [summary] = scoredArray.shift(); + const summaryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: summary.text + summarySeparator, + }); + + /* + console.log( + logPrefix, + "Trying to add similar summary:", + "\nSummary Tokens:", + summaryTokens, + "\nConsumed Similar Memory Tokens:", + consumedSimilarMemoryTokens, + "\nReserved Tokens:", + reservedSimilarMemoryTokens, + "\nWould exceed:", + summaryTokens + consumedSimilarMemoryTokens > + reservedSimilarMemoryTokens + ); + */ + + if ( + summaryTokens + consumedSimilarMemoryTokens > + reservedSimilarMemoryTokens + ) { + console.log( + logPrefix, + "Stopping similar memory selection:", + `\nconsumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})` + ); + break; + } + + selectedSimilarSummaries.push(summary); + consumedSimilarMemoryTokens += summaryTokens; + } + + selectedSummaries.push(...selectedSimilarSummaries); + + console.log( + logPrefix, + "After similar memory selection:", + "\nSummary Count:", + selectedSimilarSummaries.length, + "\nSummaries:", + selectedSimilarSummaries, + "\nReserved Tokens:", + reservedSimilarMemoryTokens, + "\nConsumed Tokens:", + consumedSimilarMemoryTokens + ); + } + + // Select random summaries + let reservedRandomMemoryTokens = Math.floor( + availableMemoryTokens * randomMemoryRatio + ); + let consumedRandomMemoryTokens = 0; + + if (randomMemoryRatio > 0) { + const selectedRandomSummaries: Summary[] = []; + + // Utilize unused token space from recent and similar selection + const unusedRecentTokens = + reservedRecentMemoryTokens - consumedRecentMemoryTokens; + const unusedSimilarTokens = + reservedSimilarMemoryTokens - consumedSimilarMemoryTokens; + + reservedRandomMemoryTokens += unusedRecentTokens + unusedSimilarTokens; + console.log( + logPrefix, + "Additional available token space for random memory:", + "\nFrom recent:", + unusedRecentTokens, + "\nFrom similar:", + unusedSimilarTokens, + "\nTotal added:", + unusedRecentTokens + unusedSimilarTokens + ); + + // Target only summaries that haven't been selected yet + const unusedSummaries = data.summaries + .filter((e) => !selectedSummaries.includes(e)) + .sort(() => Math.random() - 0.5); // Random shuffle + + for (const summary of unusedSummaries) { + const summaryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: summary.text + summarySeparator, + }); + + if ( + summaryTokens + consumedRandomMemoryTokens > + reservedRandomMemoryTokens + ) { + // Trying to select more random memory + continue; + } + + selectedRandomSummaries.push(summary); + consumedRandomMemoryTokens += summaryTokens; + } + + selectedSummaries.push(...selectedRandomSummaries); + + console.log( + logPrefix, + "After random memory selection:", + "\nSummary Count:", + selectedRandomSummaries.length, + "\nSummaries:", + selectedRandomSummaries, + "\nReserved Tokens:", + reservedRandomMemoryTokens, + "\nConsumed Tokens:", + consumedRandomMemoryTokens + ); + } + + // Sort selected summaries chronologically (by index) + selectedSummaries.sort( + (a, b) => data.summaries.indexOf(a) - data.summaries.indexOf(b) + ); + + // Generate final memory prompt + const memory = wrapWithXml( + memoryPromptTag, + selectedSummaries.map((e) => e.text).join(summarySeparator) + ); + const realMemoryTokens = await tokenizer.tokenizeChat({ + role: "system", + content: memory, + }); + + // Release reserved memory tokens + if (shouldReserveEmptyMemoryTokens) { + currentTokens -= emptyMemoryTokens; + } else { + currentTokens -= memoryTokens; + } + + currentTokens += realMemoryTokens; + + console.log( + logPrefix, + "Final memory selection:", + "\nSummary Count:", + selectedSummaries.length, + "\nSummaries:", + selectedSummaries, + "\nReal Memory Tokens:", + realMemoryTokens, + "\nAvailable Memory Tokens:", + availableMemoryTokens + ); + + if (currentTokens > maxContextTokens) { + throw new Error( + `Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})` + ); + } + + // Save last selected summaries + data.lastSelectedSummaries = selectedSummaries.map((selectedSummary) => + data.summaries.findIndex((summary) => summary === selectedSummary) + ); + + const newChats: OpenAIChat[] = [ + { + role: "system", + content: memory, + memo: "supaMemory", + }, + ...chats.slice(startIdx), + ]; + + console.log( + logPrefix, + "Exiting function:", + "\nCurrent Tokens:", + currentTokens, + "\nAll chats, including memory prompt:", + newChats, + "\nMemory Data:", + data + ); + + return { + currentTokens, + chats: newChats, + memory: toSerializableHypaV3Data(data), + }; +} + +function toHypaV3Data(serialData: SerializableHypaV3Data): HypaV3Data { + return { + ...serialData, + summaries: serialData.summaries.map((summary) => ({ + ...summary, + // Convert null back to undefined (JSON serialization converts undefined to null) + chatMemos: new Set( + summary.chatMemos.map((memo) => (memo === null ? undefined : memo)) + ), + })), + }; +} + +function toSerializableHypaV3Data(data: HypaV3Data): SerializableHypaV3Data { + return { + ...data, + summaries: data.summaries.map((summary) => ({ + ...summary, + chatMemos: [...summary.chatMemos], + })), + }; +} + +function cleanOrphanedSummary(chats: OpenAIChat[], data: HypaV3Data): void { + // Collect all memos from current chats + const currentChatMemos = new Set(chats.map((chat) => chat.memo)); + const originalLength = data.summaries.length; + + // Filter summaries - keep only those whose chatMemos are subset of current chat memos + data.summaries = data.summaries.filter((summary) => { + return isSubset(summary.chatMemos, currentChatMemos); + }); + + const removedCount = originalLength - data.summaries.length; + + if (removedCount > 0) { + console.log(logPrefix, `Cleaned ${removedCount} orphaned summaries.`); + } +} + +function isSubset(subset: Set, superset: Set): boolean { + for (const elem of subset) { + if (!superset.has(elem)) { + return false; + } + } + + return true; +} + +function wrapWithXml(tag: string, content: string): string { + return `<${tag}>\n${content}\n`; +} + +export async function summarize(oaiMessages: OpenAIChat[]): Promise { + const db = getDatabase(); + const settings = getCurrentHypaV3Preset().settings; + + const strMessages = oaiMessages + .map((chat) => `${chat.role}: ${chat.content}`) + .join("\n"); + + const summarizationPrompt = + settings.summarizationPrompt.trim() === "" + ? "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]" + : settings.summarizationPrompt; + + const formated: OpenAIChat[] = parseChatML( + summarizationPrompt.replaceAll("{{slot}}", strMessages) + ) ?? [ + { + role: "user", + content: strMessages, + }, + { + role: "system", + content: summarizationPrompt, + }, + ]; + + // API + if (settings.summarizationModel === "subModel") { + console.log(logPrefix, `Using ax model ${db.subModel} for summarization.`); + + const response = await requestChatData( + { + formated, + bias: {}, + useStreaming: false, + noMultiGen: true, + }, + "memory" + ); + + if (response.type === "streaming" || response.type === "multiline") { + throw new Error("Unexpected response type"); + } + + if (response.type === "fail") { + throw new Error(response.result); + } + + if (!response.result || response.result.trim().length === 0) { + throw new Error("Empty summary returned"); + } + + return response.result.trim(); + } + + // Local + const content = await chatCompletion(formated, settings.summarizationModel, { + max_tokens: 8192, + temperature: 0, + extra_body: { + enable_thinking: false, + }, + }); + + if (!content || content.trim().length === 0) { + throw new Error("Empty summary returned"); + } + + // Remove think content + const thinkRegex = /[\s\S]*?<\/think>/g; + + return content.replace(thinkRegex, "").trim(); +} + +export function getCurrentHypaV3Preset(): HypaV3Preset { + const db = getDatabase(); + const preset = db.hypaV3Presets?.[db.hypaV3PresetId]; + + if (!preset) { + throw new Error("Preset not found. Please select a valid preset."); + } + + return preset; +} + +export function createHypaV3Preset( + name = "New Preset", + existingSettings = {} +): HypaV3Preset { + const settings: HypaV3Settings = { + summarizationModel: "subModel", + summarizationPrompt: "", + memoryTokensRatio: 0.2, + extraSummarizationRatio: 0, + maxChatsPerSummary: 6, + recentMemoryRatio: 0.4, + similarMemoryRatio: 0.4, + enableSimilarityCorrection: false, + preserveOrphanedMemory: false, + processRegexScript: false, + doNotSummarizeUserMessage: false, + // Experimental + useExperimentalImpl: false, + summarizationRequestsPerMinute: 20, + summarizationMaxConcurrent: 1, + embeddingRequestsPerMinute: 100, + embeddingMaxConcurrent: 1, + }; + + if ( + existingSettings && + typeof existingSettings === "object" && + !Array.isArray(existingSettings) + ) { + for (const [key, value] of Object.entries(existingSettings)) { + if (key in settings && typeof value === typeof settings[key]) { + settings[key] = value; + } + } + } + + return { + name, + settings, + }; +} + +interface SummaryChunkVector { chunk: SummaryChunk; vector: memoryVector; -}; +} class HypaProcesserEx extends HypaProcesser { // Maintain references to SummaryChunks and their associated memoryVectors summaryChunkVectors: SummaryChunkVector[] = []; - // Calculate dot product similarity between two vectors - similarity(a: VectorArray, b: VectorArray): number { - let dot = 0; - - for (let i = 0; i < a.length; i++) { - dot += a[i] * b[i]; - } - - return dot; - } - async addSummaryChunks(chunks: SummaryChunk[]): Promise { // Maintain the superclass's caching structure by adding texts const texts = chunks.map((chunk) => chunk.text); @@ -977,7 +1803,7 @@ class HypaProcesserEx extends HypaProcesser { return this.summaryChunkVectors .map((scv) => ({ chunk: scv.chunk, - similarity: this.similarity(queryVector, scv.vector.embedding), + similarity: similarity(queryVector, scv.vector.embedding), })) .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) .map((result) => [result.chunk, result.similarity]); diff --git a/src/ts/process/memory/taskRateLimiter.ts b/src/ts/process/memory/taskRateLimiter.ts new file mode 100644 index 00000000..0d6cfae3 --- /dev/null +++ b/src/ts/process/memory/taskRateLimiter.ts @@ -0,0 +1,188 @@ +export interface TaskRateLimiterOptions { + tasksPerMinute?: number; + maxConcurrentTasks?: number; + failFast?: boolean; +} + +export interface BatchResult { + results: TaskResult[]; + successCount: number; + failureCount: number; + allSucceeded: boolean; +} + +export interface TaskResult { + success: boolean; + data?: TData; + error?: Error; +} + +export class TaskRateLimiter { + private static readonly LOG_PREFIX = "[TaskRateLimiter]"; + public readonly options: TaskRateLimiterOptions; + public taskQueueChangeCallback: (queuedCount: number) => void = null; + private timestamps: number[] = []; + private active: number = 0; + private queue: Array<{ + task: () => Promise>; + resolve: (result: TaskResult) => void; + }> = []; + + public constructor(options?: TaskRateLimiterOptions) { + this.options = { + tasksPerMinute: 20, + maxConcurrentTasks: 5, + failFast: true, + ...options, + }; + + if (this.options.maxConcurrentTasks > this.options.tasksPerMinute) { + throw new Error("maxConcurrentTasks must be less than tasksPerMinute"); + } + } + + public async executeTask( + task: () => Promise + ): Promise> { + return new Promise>((resolve) => { + this.queue.push({ + task: async () => { + try { + const data = await task(); + return { success: true, data }; + } catch (error) { + return { success: false, error }; + } + }, + resolve, + }); + + this.taskQueueChangeCallback?.(this.queue.length); + this.processNextFromQueue(); + }); + } + + public async executeBatch( + tasks: Array<() => Promise> + ): Promise> { + const taskResults = await Promise.all( + tasks.map((task) => this.executeTask(task)) + ); + const successCount = taskResults.filter((r) => r.success).length; + const failureCount = taskResults.length - successCount; + + return { + results: taskResults, + successCount, + failureCount, + allSucceeded: failureCount === 0, + }; + } + + public cancelPendingTasks(reason: string): void { + const error = new TaskCanceledError(reason); + + while (this.queue.length > 0) { + const { resolve } = this.queue.shift(); + resolve({ success: false, error }); + } + + this.taskQueueChangeCallback?.(this.queue.length); + } + + public get queuedTaskCount(): number { + return this.queue.length; + } + + private processNextFromQueue(): void { + if (this.queue.length === 0) return; + + if (this.active >= this.options.maxConcurrentTasks) { + // Debug log for concurrency limit hit + console.debug( + TaskRateLimiter.LOG_PREFIX, + "Concurrency limit hit:", + "\nTasks in last minute:", + this.timestamps.length + "/" + this.options.tasksPerMinute, + "\nActive tasks:", + this.active + "/" + this.options.maxConcurrentTasks, + "\nWaiting tasks in queue:", + this.queue.length + ); + + return; + } + + this.timestamps = this.timestamps.filter( + (ts) => Date.now() - ts <= 60 * 1000 + ); + + if (this.timestamps.length >= this.options.tasksPerMinute) { + const oldestTimestamp = Math.min(...this.timestamps); + const timeUntilExpiry = Math.max( + 100, + 60 * 1000 - (Date.now() - oldestTimestamp) + ); + + // Debug log for rate limit hit + console.debug( + TaskRateLimiter.LOG_PREFIX, + "Rate limit hit:", + "\nTasks in last minute:", + this.timestamps.length + "/" + this.options.tasksPerMinute, + "\nActive tasks:", + this.active + "/" + this.options.maxConcurrentTasks, + "\nWaiting tasks in queue:", + this.queue.length, + "\nWill retry in:", + timeUntilExpiry + "ms" + ); + + // Wait until rate limit window advances before retrying + setTimeout(() => this.processNextFromQueue(), timeUntilExpiry); + return; + } + + const { task, resolve } = this.queue.shift(); + + this.active++; + this.taskQueueChangeCallback?.(this.queue.length); + this.timestamps.push(Date.now()); + + // Debug log for task start + console.debug( + TaskRateLimiter.LOG_PREFIX, + "Task started:", + "\nTasks in last minute:", + this.timestamps.length + "/" + this.options.tasksPerMinute, + "\nActive tasks:", + this.active + "/" + this.options.maxConcurrentTasks, + "\nWaiting tasks in queue:", + this.queue.length + ); + + task() + .then((result) => { + resolve(result); + + if (!result.success && this.options.failFast) { + this.cancelPendingTasks("Task canceled due to previous failure"); + } + }) + .finally(() => { + this.active--; + + // Prevents call stack overflow while maintaining concurrency limits + queueMicrotask(() => this.processNextFromQueue()); + }); + } +} + +export class TaskCanceledError extends Error { + public readonly name: string; + + public constructor(message: string) { + super(message); + this.name = "TaskCanceledError"; + } +} diff --git a/src/ts/process/transformers.ts b/src/ts/process/transformers.ts index 4e238fa2..ea3a32d6 100644 --- a/src/ts/process/transformers.ts +++ b/src/ts/process/transformers.ts @@ -60,12 +60,19 @@ export const runEmbedding = async (texts: string[], model:EmbeddingModel = 'Xeno console.log('running embedding') let embeddingModelQuery = model + device if(!extractor || embeddingModelQuery !== lastEmbeddingModelQuery){ + // Dispose old extractor + if(extractor) { + await extractor.dispose() + } extractor = await pipeline('feature-extraction', model, { + // Default dtype for webgpu is fp32, so we can use q8, which is the default dtype in wasm. + ...(device === 'webgpu' ? { dtype: "q8" } : {}), device: device, progress_callback: (progress) => { console.log(progress) } }); + lastEmbeddingModelQuery = embeddingModelQuery console.log('extractor loaded') } let result = await extractor(texts, { pooling: 'mean', normalize: true }); diff --git a/src/ts/process/webllm.ts b/src/ts/process/webllm.ts new file mode 100644 index 00000000..cb1c1485 --- /dev/null +++ b/src/ts/process/webllm.ts @@ -0,0 +1,60 @@ +import { + type ChatCompletionMessageParam, + type ChatCompletionRequestNonStreaming, + MLCEngine, + CreateMLCEngine, +} from "@mlc-ai/web-llm"; + +let engine: MLCEngine = null; +let lastModel: string = null; + +export async function chatCompletion( + messages: { role: string; content: string }[], + model: string, + config: Record +): Promise { + try { + if (!engine || lastModel !== model) { + if (engine) engine.unload(); + + const initProgressCallback = (progress) => { + console.log("[WebLLM]", progress); + }; + + engine = await CreateMLCEngine( + model, + { + initProgressCallback, + }, + { context_window_size: 16384 } + ); + + lastModel = model; + } + + const request: ChatCompletionRequestNonStreaming = { + messages: messages as ChatCompletionMessageParam[], + temperature: 0, + max_tokens: 4096, + ...config, + }; + const completion = await engine.chat.completions.create(request); + const content = completion.choices[0].message.content; + + return content; + } catch (error) { + if (error instanceof Error) { + throw error; + } + + throw new Error(JSON.stringify(error)); + } +} + +export async function unloadEngine(): Promise { + if (!engine) return; + + await engine.unload(); + engine = null; + lastModel = null; +} diff --git a/src/ts/storage/database.svelte.ts b/src/ts/storage/database.svelte.ts index 7a5fb478..60c9bf93 100644 --- a/src/ts/storage/database.svelte.ts +++ b/src/ts/storage/database.svelte.ts @@ -11,6 +11,7 @@ import { prebuiltNAIpresets, prebuiltPresets } from '../process/templates/templa import { defaultColorScheme, type ColorScheme } from '../gui/colorscheme'; import type { PromptItem, PromptSettings } from '../process/prompt'; import type { OobaChatCompletionRequestParams } from '../model/ooba'; +import { type HypaV3Settings, type HypaV3Preset, createHypaV3Preset } from '../process/memory/hypav3' export let appVer = "159.0.0" export let webAppSubVer = '' @@ -515,17 +516,21 @@ export function setDatabase(data:Database){ data.checkCorruption ??= true data.OaiCompAPIKeys ??= {} data.reasoningEffort ??= 0 - data.hypaV3Settings = { - memoryTokensRatio: data.hypaV3Settings?.memoryTokensRatio ?? 0.2, - extraSummarizationRatio: data.hypaV3Settings?.extraSummarizationRatio ?? 0, - maxChatsPerSummary: data.hypaV3Settings?.maxChatsPerSummary ?? 4, - recentMemoryRatio: data.hypaV3Settings?.recentMemoryRatio ?? 0.4, - similarMemoryRatio: data.hypaV3Settings?.similarMemoryRatio ?? 0.4, - enableSimilarityCorrection: data.hypaV3Settings?.enableSimilarityCorrection ?? false, - preserveOrphanedMemory: data.hypaV3Settings?.preserveOrphanedMemory ?? false, - processRegexScript: data.hypaV3Settings?.processRegexScript ?? false, - doNotSummarizeUserMessage: data.hypaV3Settings?.doNotSummarizeUserMessage ?? false + data.hypaV3Presets ??= [ + createHypaV3Preset("Default", { + summarizationPrompt: data.supaMemoryPrompt ? data.supaMemoryPrompt : "", + ...data.hypaV3Settings + }) + ] + if (data.hypaV3Presets.length > 0) { + data.hypaV3Presets = data.hypaV3Presets.map((preset, i) => + createHypaV3Preset( + preset.name || `Preset ${i + 1}`, + preset.settings || {} + ) + ) } + data.hypaV3PresetId ??= 0 data.returnCSSError ??= true data.useExperimentalGoogleTranslator ??= false if(data.antiClaudeOverload){ //migration @@ -535,7 +540,7 @@ export function setDatabase(data:Database){ data.hypaCustomSettings = { url: data.hypaCustomSettings?.url ?? "", key: data.hypaCustomSettings?.key ?? "", - model: data.hypaCustomSettings?.model ?? "", + model: data.hypaCustomSettings?.model ?? "" } data.doNotChangeSeperateModels ??= false data.modelTools ??= [] @@ -960,17 +965,10 @@ export interface Database{ showPromptComparison:boolean checkCorruption:boolean hypaV3:boolean - hypaV3Settings: { - memoryTokensRatio: number - extraSummarizationRatio: number - maxChatsPerSummary: number - recentMemoryRatio: number - similarMemoryRatio: number - enableSimilarityCorrection: boolean - preserveOrphanedMemory: boolean - processRegexScript: boolean - doNotSummarizeUserMessage: boolean - } + hypaV3Settings: HypaV3Settings // legacy + hypaV3Presets: HypaV3Preset[] + hypaV3PresetId: number + showMenuHypaMemoryModal:boolean OaiCompAPIKeys: {[key:string]:string} inlayErrorResponse:boolean reasoningEffort:number @@ -1025,7 +1023,6 @@ export interface Database{ }[] igpPrompt:string useTokenizerCaching:boolean - showMenuHypaMemoryModal:boolean } interface SeparateParameters{ diff --git a/src/ts/stores.svelte.ts b/src/ts/stores.svelte.ts index ba714e6c..316ca189 100644 --- a/src/ts/stores.svelte.ts +++ b/src/ts/stores.svelte.ts @@ -50,6 +50,13 @@ export const alertStore = writable({ type: 'none', msg: 'n', } as alertData) +export const hypaV3ModalOpen = writable(false) +export const hypaV3ProgressStore = writable({ + open: false, + miniMsg: '', + msg: '', + subMsg: '', +}) export const selIdState = $state({ selId: -1 })