1812 lines
47 KiB
TypeScript
1812 lines
47 KiB
TypeScript
import { type memoryVector, HypaProcesser, similarity } from "./hypamemory";
|
|
import { TaskRateLimiter } from "./taskRateLimiter";
|
|
import { type EmbeddingText, HypaProcessorV2 } from "./hypamemoryv2";
|
|
import {
|
|
type Chat,
|
|
type character,
|
|
type groupChat,
|
|
getDatabase,
|
|
} from "src/ts/storage/database.svelte";
|
|
import { type OpenAIChat } from "../index.svelte";
|
|
import { requestChatData } from "../request";
|
|
import { chatCompletion, unloadEngine } from "../webllm";
|
|
import { parseChatML } from "src/ts/parser.svelte";
|
|
import { hypaV3ProgressStore } from "src/ts/stores.svelte";
|
|
import { type ChatTokenizer } from "src/ts/tokenizer";
|
|
|
|
export interface HypaV3Preset {
|
|
name: string;
|
|
settings: HypaV3Settings;
|
|
}
|
|
|
|
export interface HypaV3Settings {
|
|
summarizationModel: string;
|
|
summarizationPrompt: string;
|
|
memoryTokensRatio: number;
|
|
extraSummarizationRatio: number;
|
|
maxChatsPerSummary: number;
|
|
recentMemoryRatio: number;
|
|
similarMemoryRatio: number;
|
|
enableSimilarityCorrection: boolean;
|
|
preserveOrphanedMemory: boolean;
|
|
processRegexScript: boolean;
|
|
doNotSummarizeUserMessage: boolean;
|
|
// Experimental
|
|
useExperimentalImpl: boolean;
|
|
summarizationRequestsPerMinute: number;
|
|
summarizationMaxConcurrent: number;
|
|
embeddingRequestsPerMinute: number;
|
|
embeddingMaxConcurrent: number;
|
|
}
|
|
|
|
interface HypaV3Data {
|
|
summaries: Summary[];
|
|
lastSelectedSummaries?: number[];
|
|
}
|
|
|
|
export interface SerializableHypaV3Data {
|
|
summaries: {
|
|
text: string;
|
|
chatMemos: string[];
|
|
isImportant: boolean;
|
|
}[];
|
|
lastSelectedSummaries?: number[];
|
|
}
|
|
|
|
interface Summary {
|
|
text: string;
|
|
chatMemos: Set<string>;
|
|
isImportant: boolean;
|
|
}
|
|
|
|
interface SummaryChunk {
|
|
text: string;
|
|
summary: Summary;
|
|
}
|
|
|
|
export interface HypaV3Result {
|
|
currentTokens: number;
|
|
chats: OpenAIChat[];
|
|
error?: string;
|
|
memory?: SerializableHypaV3Data;
|
|
}
|
|
|
|
const logPrefix = "[HypaV3]";
|
|
const memoryPromptTag = "Past Events Summary";
|
|
const minChatsForSimilarity = 3;
|
|
const summarySeparator = "\n\n";
|
|
|
|
export async function hypaMemoryV3(
|
|
chats: OpenAIChat[],
|
|
currentTokens: number,
|
|
maxContextTokens: number,
|
|
room: Chat,
|
|
char: character | groupChat,
|
|
tokenizer: ChatTokenizer
|
|
): Promise<HypaV3Result> {
|
|
const settings = getCurrentHypaV3Preset().settings;
|
|
|
|
try {
|
|
if (settings.useExperimentalImpl) {
|
|
console.log(logPrefix, "Using experimental implementation.");
|
|
|
|
return await hypaMemoryV3MainExp(
|
|
chats,
|
|
currentTokens,
|
|
maxContextTokens,
|
|
room,
|
|
char,
|
|
tokenizer
|
|
);
|
|
}
|
|
|
|
return await hypaMemoryV3Main(
|
|
chats,
|
|
currentTokens,
|
|
maxContextTokens,
|
|
room,
|
|
char,
|
|
tokenizer
|
|
);
|
|
} catch (error) {
|
|
if (error instanceof Error) {
|
|
// Standard Error instance
|
|
error.message = `${logPrefix} ${error.message}`;
|
|
throw error;
|
|
}
|
|
|
|
// Fallback for non-Error object
|
|
let errorMessage: string;
|
|
|
|
try {
|
|
errorMessage = JSON.stringify(error);
|
|
} catch {
|
|
errorMessage = String(error);
|
|
}
|
|
|
|
throw new Error(`${logPrefix} ${errorMessage}`);
|
|
} finally {
|
|
if (settings.summarizationModel !== "subModel") {
|
|
try {
|
|
unloadEngine();
|
|
} catch {}
|
|
}
|
|
}
|
|
}
|
|
|
|
async function hypaMemoryV3MainExp(
|
|
chats: OpenAIChat[],
|
|
currentTokens: number,
|
|
maxContextTokens: number,
|
|
room: Chat,
|
|
char: character | groupChat,
|
|
tokenizer: ChatTokenizer
|
|
): Promise<HypaV3Result> {
|
|
const db = getDatabase();
|
|
const settings = getCurrentHypaV3Preset().settings;
|
|
|
|
// Validate settings
|
|
if (settings.recentMemoryRatio + settings.similarMemoryRatio > 1) {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.`,
|
|
};
|
|
}
|
|
|
|
// Initial token correction
|
|
currentTokens -= db.maxResponse;
|
|
|
|
// Load existing hypa data if available
|
|
let data: HypaV3Data = {
|
|
summaries: [],
|
|
lastSelectedSummaries: [],
|
|
};
|
|
|
|
if (room.hypaV3Data) {
|
|
data = toHypaV3Data(room.hypaV3Data);
|
|
}
|
|
|
|
// Clean orphaned summaries
|
|
if (!settings.preserveOrphanedMemory) {
|
|
cleanOrphanedSummary(chats, data);
|
|
}
|
|
|
|
// Determine starting index
|
|
let startIdx = 0;
|
|
|
|
if (data.summaries.length > 0) {
|
|
const lastSummary = data.summaries.at(-1);
|
|
const lastChatIndex = chats.findIndex(
|
|
(chat) => chat.memo === [...lastSummary.chatMemos].at(-1)
|
|
);
|
|
|
|
if (lastChatIndex !== -1) {
|
|
startIdx = lastChatIndex + 1;
|
|
|
|
// Exclude tokens from summarized chats
|
|
const summarizedChats = chats.slice(0, lastChatIndex + 1);
|
|
for (const chat of summarizedChats) {
|
|
currentTokens -= await tokenizer.tokenizeChat(chat);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(logPrefix, "Starting index:", startIdx);
|
|
|
|
// Reserve memory tokens
|
|
const emptyMemoryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: wrapWithXml(memoryPromptTag, ""),
|
|
});
|
|
const memoryTokens = Math.floor(
|
|
maxContextTokens * settings.memoryTokensRatio
|
|
);
|
|
const shouldReserveMemoryTokens =
|
|
data.summaries.length > 0 || currentTokens > maxContextTokens;
|
|
let availableMemoryTokens = shouldReserveMemoryTokens
|
|
? memoryTokens - emptyMemoryTokens
|
|
: 0;
|
|
|
|
if (shouldReserveMemoryTokens) {
|
|
currentTokens += memoryTokens;
|
|
console.log(logPrefix, "Reserved memory tokens:", memoryTokens);
|
|
}
|
|
|
|
// If summarization is needed
|
|
const summarizationMode = currentTokens > maxContextTokens;
|
|
const targetTokens =
|
|
maxContextTokens * (1 - settings.extraSummarizationRatio);
|
|
const toSummarizeArray: OpenAIChat[][] = [];
|
|
|
|
while (summarizationMode) {
|
|
if (currentTokens <= targetTokens) {
|
|
break;
|
|
}
|
|
|
|
if (chats.length - startIdx <= minChatsForSimilarity) {
|
|
if (currentTokens <= maxContextTokens) {
|
|
break;
|
|
} else {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
}
|
|
|
|
const toSummarize: OpenAIChat[] = [];
|
|
let toSummarizeTokens = 0;
|
|
let currentIndex = startIdx;
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Evaluating summarization batch:",
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nMax Context Tokens:",
|
|
maxContextTokens,
|
|
"\nStart Index:",
|
|
startIdx,
|
|
"\nMax Chats Per Summary:",
|
|
settings.maxChatsPerSummary
|
|
);
|
|
|
|
while (
|
|
toSummarize.length < settings.maxChatsPerSummary &&
|
|
currentIndex < chats.length - minChatsForSimilarity
|
|
) {
|
|
const chat = chats[currentIndex];
|
|
const chatTokens = await tokenizer.tokenizeChat(chat);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Evaluating chat:",
|
|
"\nIndex:",
|
|
currentIndex,
|
|
"\nRole:",
|
|
chat.role,
|
|
"\nContent:",
|
|
"\n" + chat.content,
|
|
"\nTokens:",
|
|
chatTokens
|
|
);
|
|
|
|
toSummarizeTokens += chatTokens;
|
|
|
|
let shouldSummarize = true;
|
|
|
|
if (
|
|
chat.name === "example_user" ||
|
|
chat.name === "example_assistant" ||
|
|
chat.memo === "NewChatExample"
|
|
) {
|
|
console.log(
|
|
logPrefix,
|
|
`Skipping example chat at index ${currentIndex}`
|
|
);
|
|
shouldSummarize = false;
|
|
}
|
|
|
|
if (chat.memo === "NewChat") {
|
|
console.log(logPrefix, `Skipping new chat at index ${currentIndex}`);
|
|
shouldSummarize = false;
|
|
}
|
|
|
|
if (chat.content.trim().length === 0) {
|
|
console.log(logPrefix, `Skipping empty chat at index ${currentIndex}`);
|
|
shouldSummarize = false;
|
|
}
|
|
|
|
if (settings.doNotSummarizeUserMessage && chat.role === "user") {
|
|
console.log(logPrefix, `Skipping user role at index ${currentIndex}`);
|
|
shouldSummarize = false;
|
|
}
|
|
|
|
if (shouldSummarize) {
|
|
toSummarize.push(chat);
|
|
}
|
|
|
|
currentIndex++;
|
|
}
|
|
|
|
// Stop summarization if further reduction would go below target tokens (unless we're over max tokens)
|
|
if (
|
|
currentTokens <= maxContextTokens &&
|
|
currentTokens - toSummarizeTokens < targetTokens
|
|
) {
|
|
console.log(
|
|
logPrefix,
|
|
"Stopping summarization:",
|
|
`\ncurrentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})`
|
|
);
|
|
break;
|
|
}
|
|
|
|
// Collect summarization batch
|
|
if (toSummarize.length > 0) {
|
|
console.log(
|
|
logPrefix,
|
|
"Collecting summarization batch:",
|
|
"\nTarget:",
|
|
toSummarize
|
|
);
|
|
|
|
toSummarizeArray.push([...toSummarize]);
|
|
}
|
|
|
|
currentTokens -= toSummarizeTokens;
|
|
startIdx = currentIndex;
|
|
}
|
|
|
|
// Process all collected summarization tasks
|
|
if (toSummarizeArray.length > 0) {
|
|
// Initialize rate limiter
|
|
// Local model must be processed sequentially
|
|
const rateLimiter = new TaskRateLimiter({
|
|
tasksPerMinute:
|
|
settings.summarizationModel === "subModel"
|
|
? settings.summarizationRequestsPerMinute
|
|
: 1000,
|
|
maxConcurrentTasks:
|
|
settings.summarizationModel === "subModel"
|
|
? settings.summarizationMaxConcurrent
|
|
: 1,
|
|
});
|
|
|
|
rateLimiter.taskQueueChangeCallback = (queuedCount) => {
|
|
hypaV3ProgressStore.set({
|
|
open: true,
|
|
miniMsg: `${rateLimiter.queuedTaskCount}`,
|
|
msg: `${logPrefix} Summarizing...`,
|
|
subMsg: `${rateLimiter.queuedTaskCount} queued`,
|
|
});
|
|
};
|
|
|
|
const summarizationTasks = toSummarizeArray.map(
|
|
(item) => () => summarize(item)
|
|
);
|
|
|
|
// Start of performance measurement: summarize
|
|
console.log(
|
|
logPrefix,
|
|
`Starting ${toSummarizeArray.length} summarization.`
|
|
);
|
|
const summarizeStartTime = performance.now();
|
|
|
|
const batchResult = await rateLimiter.executeBatch<string>(
|
|
summarizationTasks
|
|
);
|
|
|
|
const summarizeEndTime = performance.now();
|
|
console.debug(
|
|
`${logPrefix} summarization completed in ${
|
|
summarizeEndTime - summarizeStartTime
|
|
}ms`
|
|
);
|
|
// End of performance measurement: summarize
|
|
|
|
hypaV3ProgressStore.set({
|
|
open: false,
|
|
miniMsg: "",
|
|
msg: "",
|
|
subMsg: "",
|
|
});
|
|
|
|
// Note:
|
|
// We can't save some successful summaries to the DB temporarily
|
|
// because don't know the actual summarization model name.
|
|
// It is possible that the user can change the summarization model.
|
|
for (let i = 0; i < batchResult.results.length; i++) {
|
|
const result = batchResult.results[i];
|
|
|
|
// Push consecutive successes
|
|
if (!result.success || !result.data) {
|
|
const errorMessage = !result.success
|
|
? result.error
|
|
: "Empty summary returned";
|
|
|
|
console.log(logPrefix, "Summarization failed:", `\n${errorMessage}`);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Summarization failed: ${errorMessage}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
|
|
const summaryText = result.data;
|
|
|
|
data.summaries.push({
|
|
text: summaryText,
|
|
chatMemos: new Set(toSummarizeArray[i].map((chat) => chat.memo)),
|
|
isImportant: false,
|
|
});
|
|
}
|
|
}
|
|
|
|
console.log(
|
|
logPrefix,
|
|
`${summarizationMode ? "Completed" : "Skipped"} summarization phase:`,
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nMax Context Tokens:",
|
|
maxContextTokens,
|
|
"\nAvailable Memory Tokens:",
|
|
availableMemoryTokens
|
|
);
|
|
|
|
// Early return if no summaries
|
|
if (data.summaries.length === 0) {
|
|
const newChats: OpenAIChat[] = chats.slice(startIdx);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Exiting function:",
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nAll chats, including memory prompt:",
|
|
newChats,
|
|
"\nMemory Data:",
|
|
data
|
|
);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats: newChats,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
|
|
const selectedSummaries: Summary[] = [];
|
|
const randomMemoryRatio =
|
|
1 - settings.recentMemoryRatio - settings.similarMemoryRatio;
|
|
|
|
// Select important summaries
|
|
{
|
|
const selectedImportantSummaries: Summary[] = [];
|
|
|
|
for (const summary of data.summaries) {
|
|
if (summary.isImportant) {
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
if (summaryTokens > availableMemoryTokens) {
|
|
break;
|
|
}
|
|
|
|
selectedImportantSummaries.push(summary);
|
|
|
|
availableMemoryTokens -= summaryTokens;
|
|
}
|
|
}
|
|
|
|
selectedSummaries.push(...selectedImportantSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After important memory selection:",
|
|
"\nSummary Count:",
|
|
selectedImportantSummaries.length,
|
|
"\nSummaries:",
|
|
selectedImportantSummaries,
|
|
"\nAvailable Memory Tokens:",
|
|
availableMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Select recent summaries
|
|
const reservedRecentMemoryTokens = Math.floor(
|
|
availableMemoryTokens * settings.recentMemoryRatio
|
|
);
|
|
let consumedRecentMemoryTokens = 0;
|
|
|
|
if (settings.recentMemoryRatio > 0) {
|
|
const selectedRecentSummaries: Summary[] = [];
|
|
|
|
// Target only summaries that haven't been selected yet
|
|
const unusedSummaries = data.summaries.filter(
|
|
(e) => !selectedSummaries.includes(e)
|
|
);
|
|
|
|
// Add one by one from the end
|
|
for (let i = unusedSummaries.length - 1; i >= 0; i--) {
|
|
const summary = unusedSummaries[i];
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
if (
|
|
summaryTokens + consumedRecentMemoryTokens >
|
|
reservedRecentMemoryTokens
|
|
) {
|
|
break;
|
|
}
|
|
|
|
selectedRecentSummaries.push(summary);
|
|
consumedRecentMemoryTokens += summaryTokens;
|
|
}
|
|
|
|
selectedSummaries.push(...selectedRecentSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After recent memory selection:",
|
|
"\nSummary Count:",
|
|
selectedRecentSummaries.length,
|
|
"\nSummaries:",
|
|
selectedRecentSummaries,
|
|
"\nReserved Tokens:",
|
|
reservedRecentMemoryTokens,
|
|
"\nConsumed Tokens:",
|
|
consumedRecentMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Select similar summaries
|
|
let reservedSimilarMemoryTokens = Math.floor(
|
|
availableMemoryTokens * settings.similarMemoryRatio
|
|
);
|
|
let consumedSimilarMemoryTokens = 0;
|
|
|
|
if (settings.similarMemoryRatio > 0) {
|
|
const selectedSimilarSummaries: Summary[] = [];
|
|
|
|
// Utilize unused token space from recent selection
|
|
if (randomMemoryRatio <= 0) {
|
|
const unusedRecentTokens =
|
|
reservedRecentMemoryTokens - consumedRecentMemoryTokens;
|
|
|
|
reservedSimilarMemoryTokens += unusedRecentTokens;
|
|
console.log(
|
|
logPrefix,
|
|
"Additional available token space for similar memory:",
|
|
"\nFrom recent:",
|
|
unusedRecentTokens
|
|
);
|
|
}
|
|
|
|
// Target only summaries that haven't been selected yet
|
|
const unusedSummaries = data.summaries.filter(
|
|
(e) => !selectedSummaries.includes(e)
|
|
);
|
|
|
|
// Dynamically generate embedding texts
|
|
const ebdTexts: EmbeddingText<Summary>[] = unusedSummaries.flatMap(
|
|
(summary) => {
|
|
const splitted = summary.text
|
|
.split("\n\n")
|
|
.filter((e) => e.trim().length > 0);
|
|
|
|
return splitted.map((e) => ({
|
|
content: e.trim(),
|
|
metadata: summary,
|
|
}));
|
|
}
|
|
);
|
|
|
|
// Initialize embedding processor
|
|
const processor = new HypaProcessorV2<Summary>({
|
|
rateLimiter: new TaskRateLimiter({
|
|
tasksPerMinute: settings.embeddingRequestsPerMinute,
|
|
maxConcurrentTasks: settings.embeddingMaxConcurrent,
|
|
}),
|
|
});
|
|
|
|
processor.progressCallback = (queuedCount) => {
|
|
hypaV3ProgressStore.set({
|
|
open: true,
|
|
miniMsg: `${queuedCount}`,
|
|
msg: `${logPrefix} Similarity searching...`,
|
|
subMsg: `${queuedCount} queued`,
|
|
});
|
|
};
|
|
|
|
try {
|
|
// Start of performance measurement: addTexts
|
|
console.log(
|
|
`${logPrefix} Starting addTexts with ${ebdTexts.length} chunks`
|
|
);
|
|
const addStartTime = performance.now();
|
|
|
|
// Add EmbeddingTexts to processor for similarity search
|
|
await processor.addTexts(ebdTexts);
|
|
|
|
const addEndTime = performance.now();
|
|
console.debug(
|
|
`${logPrefix} addTexts completed in ${addEndTime - addStartTime}ms`
|
|
);
|
|
// End of performance measurement: addTexts
|
|
} catch (error) {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Similarity search failed: ${error}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
} finally {
|
|
hypaV3ProgressStore.set({
|
|
open: false,
|
|
miniMsg: "",
|
|
msg: "",
|
|
subMsg: "",
|
|
});
|
|
}
|
|
|
|
const recentChats = chats
|
|
.slice(-minChatsForSimilarity)
|
|
.filter((chat) => chat.content.trim().length > 0);
|
|
const queries: string[] = recentChats.flatMap((chat) => {
|
|
return chat.content.split("\n\n").filter((e) => e.trim().length > 0);
|
|
});
|
|
|
|
if (queries.length > 0) {
|
|
const scoredSummaries = new Map<Summary, number>();
|
|
|
|
try {
|
|
// Start of performance measurement: similarity search
|
|
console.log(
|
|
`${logPrefix} Starting similarity search with ${recentChats.length} queries`
|
|
);
|
|
const searchStartTime = performance.now();
|
|
|
|
const batchScoredResults = await processor.similaritySearchScoredBatch(
|
|
queries
|
|
);
|
|
|
|
const searchEndTime = performance.now();
|
|
console.debug(
|
|
`${logPrefix} Similarity search completed in ${
|
|
searchEndTime - searchStartTime
|
|
}ms`
|
|
);
|
|
// End of performance measurement: similarity search
|
|
|
|
for (const scoredResults of batchScoredResults) {
|
|
for (const [ebdResult, similarity] of scoredResults) {
|
|
const summary = ebdResult.metadata;
|
|
|
|
scoredSummaries.set(
|
|
summary,
|
|
(scoredSummaries.get(summary) || 0) + similarity
|
|
);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Similarity search failed: ${error}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
} finally {
|
|
hypaV3ProgressStore.set({
|
|
open: false,
|
|
miniMsg: "",
|
|
msg: "",
|
|
subMsg: "",
|
|
});
|
|
}
|
|
|
|
// Normalize scores
|
|
if (scoredSummaries.size > 0) {
|
|
const maxScore = Math.max(...scoredSummaries.values());
|
|
|
|
for (const [summary, score] of scoredSummaries.entries()) {
|
|
scoredSummaries.set(summary, score / maxScore);
|
|
}
|
|
}
|
|
|
|
// Sort in descending order
|
|
const scoredArray = [...scoredSummaries.entries()].sort(
|
|
([, scoreA], [, scoreB]) => scoreB - scoreA
|
|
);
|
|
|
|
while (scoredArray.length > 0) {
|
|
const [summary] = scoredArray.shift();
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
/*
|
|
console.log(
|
|
logPrefix,
|
|
"Trying to add similar summary:",
|
|
"\nSummary Tokens:",
|
|
summaryTokens,
|
|
"\nConsumed Similar Memory Tokens:",
|
|
consumedSimilarMemoryTokens,
|
|
"\nReserved Tokens:",
|
|
reservedSimilarMemoryTokens,
|
|
"\nWould exceed:",
|
|
summaryTokens + consumedSimilarMemoryTokens >
|
|
reservedSimilarMemoryTokens
|
|
);
|
|
*/
|
|
|
|
if (
|
|
summaryTokens + consumedSimilarMemoryTokens >
|
|
reservedSimilarMemoryTokens
|
|
) {
|
|
console.log(
|
|
logPrefix,
|
|
"Stopping similar memory selection:",
|
|
`\nconsumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})`
|
|
);
|
|
break;
|
|
}
|
|
|
|
selectedSimilarSummaries.push(summary);
|
|
consumedSimilarMemoryTokens += summaryTokens;
|
|
}
|
|
|
|
selectedSummaries.push(...selectedSimilarSummaries);
|
|
}
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After similar memory selection:",
|
|
"\nSummary Count:",
|
|
selectedSimilarSummaries.length,
|
|
"\nSummaries:",
|
|
selectedSimilarSummaries,
|
|
"\nReserved Tokens:",
|
|
reservedSimilarMemoryTokens,
|
|
"\nConsumed Tokens:",
|
|
consumedSimilarMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Select random summaries
|
|
let reservedRandomMemoryTokens = Math.floor(
|
|
availableMemoryTokens * randomMemoryRatio
|
|
);
|
|
let consumedRandomMemoryTokens = 0;
|
|
|
|
if (randomMemoryRatio > 0) {
|
|
const selectedRandomSummaries: Summary[] = [];
|
|
|
|
// Utilize unused token space from recent and similar selection
|
|
const unusedRecentTokens =
|
|
reservedRecentMemoryTokens - consumedRecentMemoryTokens;
|
|
const unusedSimilarTokens =
|
|
reservedSimilarMemoryTokens - consumedSimilarMemoryTokens;
|
|
|
|
reservedRandomMemoryTokens += unusedRecentTokens + unusedSimilarTokens;
|
|
console.log(
|
|
logPrefix,
|
|
"Additional available token space for random memory:",
|
|
"\nFrom recent:",
|
|
unusedRecentTokens,
|
|
"\nFrom similar:",
|
|
unusedSimilarTokens,
|
|
"\nTotal added:",
|
|
unusedRecentTokens + unusedSimilarTokens
|
|
);
|
|
|
|
// Target only summaries that haven't been selected yet
|
|
const unusedSummaries = data.summaries
|
|
.filter((e) => !selectedSummaries.includes(e))
|
|
.sort(() => Math.random() - 0.5); // Random shuffle
|
|
|
|
for (const summary of unusedSummaries) {
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
if (
|
|
summaryTokens + consumedRandomMemoryTokens >
|
|
reservedRandomMemoryTokens
|
|
) {
|
|
// Trying to select more random memory
|
|
continue;
|
|
}
|
|
|
|
selectedRandomSummaries.push(summary);
|
|
consumedRandomMemoryTokens += summaryTokens;
|
|
}
|
|
|
|
selectedSummaries.push(...selectedRandomSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After random memory selection:",
|
|
"\nSummary Count:",
|
|
selectedRandomSummaries.length,
|
|
"\nSummaries:",
|
|
selectedRandomSummaries,
|
|
"\nReserved Tokens:",
|
|
reservedRandomMemoryTokens,
|
|
"\nConsumed Tokens:",
|
|
consumedRandomMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Sort selected summaries chronologically (by index)
|
|
selectedSummaries.sort(
|
|
(a, b) => data.summaries.indexOf(a) - data.summaries.indexOf(b)
|
|
);
|
|
|
|
// Generate final memory prompt
|
|
const memory = wrapWithXml(
|
|
memoryPromptTag,
|
|
selectedSummaries.map((e) => e.text).join(summarySeparator)
|
|
);
|
|
const realMemoryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: memory,
|
|
});
|
|
|
|
// Release reserved memory tokens
|
|
if (shouldReserveMemoryTokens) {
|
|
currentTokens -= memoryTokens;
|
|
}
|
|
|
|
currentTokens += realMemoryTokens;
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Final memory selection:",
|
|
"\nSummary Count:",
|
|
selectedSummaries.length,
|
|
"\nSummaries:",
|
|
selectedSummaries,
|
|
"\nReal Memory Tokens:",
|
|
realMemoryTokens,
|
|
"\nAvailable Memory Tokens:",
|
|
availableMemoryTokens
|
|
);
|
|
|
|
if (currentTokens > maxContextTokens) {
|
|
throw new Error(
|
|
`Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})`
|
|
);
|
|
}
|
|
|
|
// Save last selected summaries
|
|
data.lastSelectedSummaries = selectedSummaries.map((selectedSummary) =>
|
|
data.summaries.findIndex((summary) => summary === selectedSummary)
|
|
);
|
|
|
|
const newChats: OpenAIChat[] = [
|
|
{
|
|
role: "system",
|
|
content: memory,
|
|
memo: "supaMemory",
|
|
},
|
|
...chats.slice(startIdx),
|
|
];
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Exiting function:",
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nAll chats, including memory prompt:",
|
|
newChats,
|
|
"\nMemory Data:",
|
|
data
|
|
);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats: newChats,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
|
|
async function hypaMemoryV3Main(
|
|
chats: OpenAIChat[],
|
|
currentTokens: number,
|
|
maxContextTokens: number,
|
|
room: Chat,
|
|
char: character | groupChat,
|
|
tokenizer: ChatTokenizer
|
|
): Promise<HypaV3Result> {
|
|
const db = getDatabase();
|
|
const settings = getCurrentHypaV3Preset().settings;
|
|
|
|
// Validate settings
|
|
if (settings.recentMemoryRatio + settings.similarMemoryRatio > 1) {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.`,
|
|
};
|
|
}
|
|
|
|
// Initial token correction
|
|
currentTokens -= db.maxResponse;
|
|
|
|
// Load existing hypa data if available
|
|
let data: HypaV3Data = {
|
|
summaries: [],
|
|
lastSelectedSummaries: [],
|
|
};
|
|
|
|
if (room.hypaV3Data) {
|
|
data = toHypaV3Data(room.hypaV3Data);
|
|
}
|
|
|
|
// Clean orphaned summaries
|
|
if (!settings.preserveOrphanedMemory) {
|
|
cleanOrphanedSummary(chats, data);
|
|
}
|
|
|
|
// Determine starting index
|
|
let startIdx = 0;
|
|
|
|
if (data.summaries.length > 0) {
|
|
const lastSummary = data.summaries.at(-1);
|
|
const lastChatIndex = chats.findIndex(
|
|
(chat) => chat.memo === [...lastSummary.chatMemos].at(-1)
|
|
);
|
|
|
|
if (lastChatIndex !== -1) {
|
|
startIdx = lastChatIndex + 1;
|
|
|
|
// Exclude tokens from summarized chats
|
|
const summarizedChats = chats.slice(0, lastChatIndex + 1);
|
|
for (const chat of summarizedChats) {
|
|
currentTokens -= await tokenizer.tokenizeChat(chat);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(logPrefix, "Starting index:", startIdx);
|
|
|
|
// Reserve memory tokens
|
|
const emptyMemoryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: wrapWithXml(memoryPromptTag, ""),
|
|
});
|
|
const memoryTokens = Math.floor(
|
|
maxContextTokens * settings.memoryTokensRatio
|
|
);
|
|
const shouldReserveEmptyMemoryTokens =
|
|
data.summaries.length === 0 &&
|
|
currentTokens + emptyMemoryTokens <= maxContextTokens;
|
|
let availableMemoryTokens = shouldReserveEmptyMemoryTokens
|
|
? 0
|
|
: memoryTokens - emptyMemoryTokens;
|
|
|
|
if (shouldReserveEmptyMemoryTokens) {
|
|
currentTokens += emptyMemoryTokens;
|
|
console.log(logPrefix, "Reserved empty memory tokens:", emptyMemoryTokens);
|
|
} else {
|
|
currentTokens += memoryTokens;
|
|
console.log(logPrefix, "Reserved max memory tokens:", memoryTokens);
|
|
}
|
|
|
|
// If summarization is needed
|
|
const summarizationMode = currentTokens > maxContextTokens;
|
|
const targetTokens =
|
|
maxContextTokens * (1 - settings.extraSummarizationRatio);
|
|
|
|
while (summarizationMode) {
|
|
if (currentTokens <= targetTokens) {
|
|
break;
|
|
}
|
|
|
|
if (chats.length - startIdx <= minChatsForSimilarity) {
|
|
if (currentTokens <= maxContextTokens) {
|
|
break;
|
|
} else {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
}
|
|
|
|
const toSummarize: OpenAIChat[] = [];
|
|
const endIdx = Math.min(
|
|
startIdx + settings.maxChatsPerSummary,
|
|
chats.length - minChatsForSimilarity
|
|
);
|
|
let toSummarizeTokens = 0;
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Evaluating summarization batch:",
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nMax Context Tokens:",
|
|
maxContextTokens,
|
|
"\nStart Index:",
|
|
startIdx,
|
|
"\nEnd Index:",
|
|
endIdx,
|
|
"\nChat Count:",
|
|
endIdx - startIdx,
|
|
"\nMax Chats Per Summary:",
|
|
settings.maxChatsPerSummary
|
|
);
|
|
|
|
for (let i = startIdx; i < endIdx; i++) {
|
|
const chat = chats[i];
|
|
const chatTokens = await tokenizer.tokenizeChat(chat);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Evaluating chat:",
|
|
"\nIndex:",
|
|
i,
|
|
"\nRole:",
|
|
chat.role,
|
|
"\nContent:",
|
|
"\n" + chat.content,
|
|
"\nTokens:",
|
|
chatTokens
|
|
);
|
|
|
|
toSummarizeTokens += chatTokens;
|
|
|
|
if (
|
|
chat.name === "example_user" ||
|
|
chat.name === "example_assistant" ||
|
|
chat.memo === "NewChatExample"
|
|
) {
|
|
console.log(logPrefix, `Skipping example chat at index ${i}`);
|
|
continue;
|
|
}
|
|
|
|
if (chat.memo === "NewChat") {
|
|
console.log(logPrefix, `Skipping new chat at index ${i}`);
|
|
continue;
|
|
}
|
|
|
|
if (chat.content.trim().length === 0) {
|
|
console.log(logPrefix, `Skipping empty chat at index ${i}`);
|
|
continue;
|
|
}
|
|
|
|
if (settings.doNotSummarizeUserMessage && chat.role === "user") {
|
|
console.log(logPrefix, `Skipping user role at index ${i}`);
|
|
continue;
|
|
}
|
|
|
|
toSummarize.push(chat);
|
|
}
|
|
|
|
// Stop summarization if further reduction would go below target tokens (unless we're over max tokens)
|
|
if (
|
|
currentTokens <= maxContextTokens &&
|
|
currentTokens - toSummarizeTokens < targetTokens
|
|
) {
|
|
console.log(
|
|
logPrefix,
|
|
"Stopping summarization:",
|
|
`\ncurrentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})`
|
|
);
|
|
break;
|
|
}
|
|
|
|
// Attempt summarization
|
|
if (toSummarize.length > 0) {
|
|
console.log(
|
|
logPrefix,
|
|
"Attempting summarization:",
|
|
"\nTarget:",
|
|
toSummarize
|
|
);
|
|
|
|
try {
|
|
const summarizeResult = await summarize(toSummarize);
|
|
|
|
data.summaries.push({
|
|
text: summarizeResult,
|
|
chatMemos: new Set(toSummarize.map((chat) => chat.memo)),
|
|
isImportant: false,
|
|
});
|
|
} catch (error) {
|
|
console.log(logPrefix, "Summarization failed:", `\n${error}`);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Summarization failed: ${error}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
}
|
|
|
|
currentTokens -= toSummarizeTokens;
|
|
startIdx = endIdx;
|
|
}
|
|
|
|
console.log(
|
|
logPrefix,
|
|
`${summarizationMode ? "Completed" : "Skipped"} summarization phase:`,
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nMax Context Tokens:",
|
|
maxContextTokens,
|
|
"\nAvailable Memory Tokens:",
|
|
availableMemoryTokens
|
|
);
|
|
|
|
// Early return if no summaries
|
|
if (data.summaries.length === 0) {
|
|
// Generate final memory prompt
|
|
const memory = wrapWithXml(memoryPromptTag, "");
|
|
|
|
const newChats: OpenAIChat[] = [
|
|
{
|
|
role: "system",
|
|
content: memory,
|
|
memo: "supaMemory",
|
|
},
|
|
...chats.slice(startIdx),
|
|
];
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Exiting function:",
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nAll chats, including memory prompt:",
|
|
newChats,
|
|
"\nMemory Data:",
|
|
data
|
|
);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats: newChats,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
|
|
const selectedSummaries: Summary[] = [];
|
|
const randomMemoryRatio =
|
|
1 - settings.recentMemoryRatio - settings.similarMemoryRatio;
|
|
|
|
// Select important summaries
|
|
{
|
|
const selectedImportantSummaries: Summary[] = [];
|
|
|
|
for (const summary of data.summaries) {
|
|
if (summary.isImportant) {
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
if (summaryTokens > availableMemoryTokens) {
|
|
break;
|
|
}
|
|
|
|
selectedImportantSummaries.push(summary);
|
|
|
|
availableMemoryTokens -= summaryTokens;
|
|
}
|
|
}
|
|
|
|
selectedSummaries.push(...selectedImportantSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After important memory selection:",
|
|
"\nSummary Count:",
|
|
selectedImportantSummaries.length,
|
|
"\nSummaries:",
|
|
selectedImportantSummaries,
|
|
"\nAvailable Memory Tokens:",
|
|
availableMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Select recent summaries
|
|
const reservedRecentMemoryTokens = Math.floor(
|
|
availableMemoryTokens * settings.recentMemoryRatio
|
|
);
|
|
let consumedRecentMemoryTokens = 0;
|
|
|
|
if (settings.recentMemoryRatio > 0) {
|
|
const selectedRecentSummaries: Summary[] = [];
|
|
|
|
// Target only summaries that haven't been selected yet
|
|
const unusedSummaries = data.summaries.filter(
|
|
(e) => !selectedSummaries.includes(e)
|
|
);
|
|
|
|
// Add one by one from the end
|
|
for (let i = unusedSummaries.length - 1; i >= 0; i--) {
|
|
const summary = unusedSummaries[i];
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
if (
|
|
summaryTokens + consumedRecentMemoryTokens >
|
|
reservedRecentMemoryTokens
|
|
) {
|
|
break;
|
|
}
|
|
|
|
selectedRecentSummaries.push(summary);
|
|
consumedRecentMemoryTokens += summaryTokens;
|
|
}
|
|
|
|
selectedSummaries.push(...selectedRecentSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After recent memory selection:",
|
|
"\nSummary Count:",
|
|
selectedRecentSummaries.length,
|
|
"\nSummaries:",
|
|
selectedRecentSummaries,
|
|
"\nReserved Tokens:",
|
|
reservedRecentMemoryTokens,
|
|
"\nConsumed Tokens:",
|
|
consumedRecentMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Select similar summaries
|
|
let reservedSimilarMemoryTokens = Math.floor(
|
|
availableMemoryTokens * settings.similarMemoryRatio
|
|
);
|
|
let consumedSimilarMemoryTokens = 0;
|
|
|
|
if (settings.similarMemoryRatio > 0) {
|
|
const selectedSimilarSummaries: Summary[] = [];
|
|
|
|
// Utilize unused token space from recent selection
|
|
if (randomMemoryRatio <= 0) {
|
|
const unusedRecentTokens =
|
|
reservedRecentMemoryTokens - consumedRecentMemoryTokens;
|
|
|
|
reservedSimilarMemoryTokens += unusedRecentTokens;
|
|
console.log(
|
|
logPrefix,
|
|
"Additional available token space for similar memory:",
|
|
"\nFrom recent:",
|
|
unusedRecentTokens
|
|
);
|
|
}
|
|
|
|
// Target only summaries that haven't been selected yet
|
|
const unusedSummaries = data.summaries.filter(
|
|
(e) => !selectedSummaries.includes(e)
|
|
);
|
|
|
|
// Dynamically generate summary chunks
|
|
const summaryChunks: SummaryChunk[] = [];
|
|
|
|
unusedSummaries.forEach((summary) => {
|
|
const splitted = summary.text
|
|
.split("\n\n")
|
|
.filter((e) => e.trim().length > 0);
|
|
|
|
summaryChunks.push(
|
|
...splitted.map((e) => ({
|
|
text: e.trim(),
|
|
summary,
|
|
}))
|
|
);
|
|
});
|
|
|
|
// Initialize embedding processor
|
|
const processor = new HypaProcesserEx(db.hypaModel);
|
|
processor.oaikey = db.supaMemoryKey;
|
|
|
|
// Add summaryChunks to processor for similarity search
|
|
try {
|
|
await processor.addSummaryChunks(summaryChunks);
|
|
} catch (error) {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Similarity search failed: ${error}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
|
|
const scoredSummaries = new Map<Summary, number>();
|
|
const recentChats = chats
|
|
.slice(-minChatsForSimilarity)
|
|
.filter((chat) => chat.content.trim().length > 0);
|
|
|
|
if (recentChats.length > 0) {
|
|
// Raw recent chat search
|
|
const queries = recentChats.map((chat) => chat.content);
|
|
|
|
if (settings.enableSimilarityCorrection && recentChats.length > 1) {
|
|
// Raw + Summarized recent chat search
|
|
// Summarizing is meaningful when there are more than 2 recent chats
|
|
|
|
// Attempt summarization
|
|
console.log(
|
|
logPrefix,
|
|
"Attempting summarization for similarity search:",
|
|
"\nTarget:",
|
|
recentChats
|
|
);
|
|
|
|
try {
|
|
const summarizeResult = await summarize(recentChats);
|
|
|
|
queries.push(summarizeResult);
|
|
} catch (error) {
|
|
console.log(logPrefix, "Summarization failed:", `\n${error}`);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Summarization failed: ${error}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
}
|
|
|
|
try {
|
|
for (const query of queries) {
|
|
const scoredChunks = await processor.similaritySearchScoredEx(query);
|
|
|
|
for (const [chunk, similarity] of scoredChunks) {
|
|
const summary = chunk.summary;
|
|
|
|
scoredSummaries.set(
|
|
summary,
|
|
(scoredSummaries.get(summary) || 0) + similarity
|
|
);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
return {
|
|
currentTokens,
|
|
chats,
|
|
error: `${logPrefix} Similarity search failed: ${error}`,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
}
|
|
|
|
// Sort in descending order
|
|
const scoredArray = [...scoredSummaries.entries()].sort(
|
|
([, scoreA], [, scoreB]) => scoreB - scoreA
|
|
);
|
|
|
|
while (scoredArray.length > 0) {
|
|
const [summary] = scoredArray.shift();
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
/*
|
|
console.log(
|
|
logPrefix,
|
|
"Trying to add similar summary:",
|
|
"\nSummary Tokens:",
|
|
summaryTokens,
|
|
"\nConsumed Similar Memory Tokens:",
|
|
consumedSimilarMemoryTokens,
|
|
"\nReserved Tokens:",
|
|
reservedSimilarMemoryTokens,
|
|
"\nWould exceed:",
|
|
summaryTokens + consumedSimilarMemoryTokens >
|
|
reservedSimilarMemoryTokens
|
|
);
|
|
*/
|
|
|
|
if (
|
|
summaryTokens + consumedSimilarMemoryTokens >
|
|
reservedSimilarMemoryTokens
|
|
) {
|
|
console.log(
|
|
logPrefix,
|
|
"Stopping similar memory selection:",
|
|
`\nconsumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})`
|
|
);
|
|
break;
|
|
}
|
|
|
|
selectedSimilarSummaries.push(summary);
|
|
consumedSimilarMemoryTokens += summaryTokens;
|
|
}
|
|
|
|
selectedSummaries.push(...selectedSimilarSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After similar memory selection:",
|
|
"\nSummary Count:",
|
|
selectedSimilarSummaries.length,
|
|
"\nSummaries:",
|
|
selectedSimilarSummaries,
|
|
"\nReserved Tokens:",
|
|
reservedSimilarMemoryTokens,
|
|
"\nConsumed Tokens:",
|
|
consumedSimilarMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Select random summaries
|
|
let reservedRandomMemoryTokens = Math.floor(
|
|
availableMemoryTokens * randomMemoryRatio
|
|
);
|
|
let consumedRandomMemoryTokens = 0;
|
|
|
|
if (randomMemoryRatio > 0) {
|
|
const selectedRandomSummaries: Summary[] = [];
|
|
|
|
// Utilize unused token space from recent and similar selection
|
|
const unusedRecentTokens =
|
|
reservedRecentMemoryTokens - consumedRecentMemoryTokens;
|
|
const unusedSimilarTokens =
|
|
reservedSimilarMemoryTokens - consumedSimilarMemoryTokens;
|
|
|
|
reservedRandomMemoryTokens += unusedRecentTokens + unusedSimilarTokens;
|
|
console.log(
|
|
logPrefix,
|
|
"Additional available token space for random memory:",
|
|
"\nFrom recent:",
|
|
unusedRecentTokens,
|
|
"\nFrom similar:",
|
|
unusedSimilarTokens,
|
|
"\nTotal added:",
|
|
unusedRecentTokens + unusedSimilarTokens
|
|
);
|
|
|
|
// Target only summaries that haven't been selected yet
|
|
const unusedSummaries = data.summaries
|
|
.filter((e) => !selectedSummaries.includes(e))
|
|
.sort(() => Math.random() - 0.5); // Random shuffle
|
|
|
|
for (const summary of unusedSummaries) {
|
|
const summaryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: summary.text + summarySeparator,
|
|
});
|
|
|
|
if (
|
|
summaryTokens + consumedRandomMemoryTokens >
|
|
reservedRandomMemoryTokens
|
|
) {
|
|
// Trying to select more random memory
|
|
continue;
|
|
}
|
|
|
|
selectedRandomSummaries.push(summary);
|
|
consumedRandomMemoryTokens += summaryTokens;
|
|
}
|
|
|
|
selectedSummaries.push(...selectedRandomSummaries);
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"After random memory selection:",
|
|
"\nSummary Count:",
|
|
selectedRandomSummaries.length,
|
|
"\nSummaries:",
|
|
selectedRandomSummaries,
|
|
"\nReserved Tokens:",
|
|
reservedRandomMemoryTokens,
|
|
"\nConsumed Tokens:",
|
|
consumedRandomMemoryTokens
|
|
);
|
|
}
|
|
|
|
// Sort selected summaries chronologically (by index)
|
|
selectedSummaries.sort(
|
|
(a, b) => data.summaries.indexOf(a) - data.summaries.indexOf(b)
|
|
);
|
|
|
|
// Generate final memory prompt
|
|
const memory = wrapWithXml(
|
|
memoryPromptTag,
|
|
selectedSummaries.map((e) => e.text).join(summarySeparator)
|
|
);
|
|
const realMemoryTokens = await tokenizer.tokenizeChat({
|
|
role: "system",
|
|
content: memory,
|
|
});
|
|
|
|
// Release reserved memory tokens
|
|
if (shouldReserveEmptyMemoryTokens) {
|
|
currentTokens -= emptyMemoryTokens;
|
|
} else {
|
|
currentTokens -= memoryTokens;
|
|
}
|
|
|
|
currentTokens += realMemoryTokens;
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Final memory selection:",
|
|
"\nSummary Count:",
|
|
selectedSummaries.length,
|
|
"\nSummaries:",
|
|
selectedSummaries,
|
|
"\nReal Memory Tokens:",
|
|
realMemoryTokens,
|
|
"\nAvailable Memory Tokens:",
|
|
availableMemoryTokens
|
|
);
|
|
|
|
if (currentTokens > maxContextTokens) {
|
|
throw new Error(
|
|
`Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})`
|
|
);
|
|
}
|
|
|
|
// Save last selected summaries
|
|
data.lastSelectedSummaries = selectedSummaries.map((selectedSummary) =>
|
|
data.summaries.findIndex((summary) => summary === selectedSummary)
|
|
);
|
|
|
|
const newChats: OpenAIChat[] = [
|
|
{
|
|
role: "system",
|
|
content: memory,
|
|
memo: "supaMemory",
|
|
},
|
|
...chats.slice(startIdx),
|
|
];
|
|
|
|
console.log(
|
|
logPrefix,
|
|
"Exiting function:",
|
|
"\nCurrent Tokens:",
|
|
currentTokens,
|
|
"\nAll chats, including memory prompt:",
|
|
newChats,
|
|
"\nMemory Data:",
|
|
data
|
|
);
|
|
|
|
return {
|
|
currentTokens,
|
|
chats: newChats,
|
|
memory: toSerializableHypaV3Data(data),
|
|
};
|
|
}
|
|
|
|
function toHypaV3Data(serialData: SerializableHypaV3Data): HypaV3Data {
|
|
return {
|
|
...serialData,
|
|
summaries: serialData.summaries.map((summary) => ({
|
|
...summary,
|
|
// Convert null back to undefined (JSON serialization converts undefined to null)
|
|
chatMemos: new Set(
|
|
summary.chatMemos.map((memo) => (memo === null ? undefined : memo))
|
|
),
|
|
})),
|
|
};
|
|
}
|
|
|
|
function toSerializableHypaV3Data(data: HypaV3Data): SerializableHypaV3Data {
|
|
return {
|
|
...data,
|
|
summaries: data.summaries.map((summary) => ({
|
|
...summary,
|
|
chatMemos: [...summary.chatMemos],
|
|
})),
|
|
};
|
|
}
|
|
|
|
function cleanOrphanedSummary(chats: OpenAIChat[], data: HypaV3Data): void {
|
|
// Collect all memos from current chats
|
|
const currentChatMemos = new Set(chats.map((chat) => chat.memo));
|
|
const originalLength = data.summaries.length;
|
|
|
|
// Filter summaries - keep only those whose chatMemos are subset of current chat memos
|
|
data.summaries = data.summaries.filter((summary) => {
|
|
return isSubset(summary.chatMemos, currentChatMemos);
|
|
});
|
|
|
|
const removedCount = originalLength - data.summaries.length;
|
|
|
|
if (removedCount > 0) {
|
|
console.log(logPrefix, `Cleaned ${removedCount} orphaned summaries.`);
|
|
}
|
|
}
|
|
|
|
function isSubset(subset: Set<string>, superset: Set<string>): boolean {
|
|
for (const elem of subset) {
|
|
if (!superset.has(elem)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
function wrapWithXml(tag: string, content: string): string {
|
|
return `<${tag}>\n${content}\n</${tag}>`;
|
|
}
|
|
|
|
export async function summarize(oaiMessages: OpenAIChat[]): Promise<string> {
|
|
const db = getDatabase();
|
|
const settings = getCurrentHypaV3Preset().settings;
|
|
|
|
const strMessages = oaiMessages
|
|
.map((chat) => `${chat.role}: ${chat.content}`)
|
|
.join("\n");
|
|
|
|
const summarizationPrompt =
|
|
settings.summarizationPrompt.trim() === ""
|
|
? "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]"
|
|
: settings.summarizationPrompt;
|
|
|
|
const formated: OpenAIChat[] = parseChatML(
|
|
summarizationPrompt.replaceAll("{{slot}}", strMessages)
|
|
) ?? [
|
|
{
|
|
role: "user",
|
|
content: strMessages,
|
|
},
|
|
{
|
|
role: "system",
|
|
content: summarizationPrompt,
|
|
},
|
|
];
|
|
|
|
// API
|
|
if (settings.summarizationModel === "subModel") {
|
|
console.log(logPrefix, `Using ax model ${db.subModel} for summarization.`);
|
|
|
|
const response = await requestChatData(
|
|
{
|
|
formated,
|
|
bias: {},
|
|
useStreaming: false,
|
|
noMultiGen: true,
|
|
},
|
|
"memory"
|
|
);
|
|
|
|
if (response.type === "streaming" || response.type === "multiline") {
|
|
throw new Error("Unexpected response type");
|
|
}
|
|
|
|
if (response.type === "fail") {
|
|
throw new Error(response.result);
|
|
}
|
|
|
|
if (!response.result || response.result.trim().length === 0) {
|
|
throw new Error("Empty summary returned");
|
|
}
|
|
|
|
return response.result.trim();
|
|
}
|
|
|
|
// Local
|
|
const content = await chatCompletion(formated, settings.summarizationModel, {
|
|
max_tokens: 8192,
|
|
temperature: 0,
|
|
extra_body: {
|
|
enable_thinking: false,
|
|
},
|
|
});
|
|
|
|
if (!content || content.trim().length === 0) {
|
|
throw new Error("Empty summary returned");
|
|
}
|
|
|
|
// Remove think content
|
|
const thinkRegex = /<think>[\s\S]*?<\/think>/g;
|
|
|
|
return content.replace(thinkRegex, "").trim();
|
|
}
|
|
|
|
export function getCurrentHypaV3Preset(): HypaV3Preset {
|
|
const db = getDatabase();
|
|
const preset = db.hypaV3Presets?.[db.hypaV3PresetId];
|
|
|
|
if (!preset) {
|
|
throw new Error("Preset not found. Please select a valid preset.");
|
|
}
|
|
|
|
return preset;
|
|
}
|
|
|
|
export function createHypaV3Preset(
|
|
name = "New Preset",
|
|
existingSettings = {}
|
|
): HypaV3Preset {
|
|
const settings: HypaV3Settings = {
|
|
summarizationModel: "subModel",
|
|
summarizationPrompt: "",
|
|
memoryTokensRatio: 0.2,
|
|
extraSummarizationRatio: 0,
|
|
maxChatsPerSummary: 6,
|
|
recentMemoryRatio: 0.4,
|
|
similarMemoryRatio: 0.4,
|
|
enableSimilarityCorrection: false,
|
|
preserveOrphanedMemory: false,
|
|
processRegexScript: false,
|
|
doNotSummarizeUserMessage: false,
|
|
// Experimental
|
|
useExperimentalImpl: false,
|
|
summarizationRequestsPerMinute: 20,
|
|
summarizationMaxConcurrent: 1,
|
|
embeddingRequestsPerMinute: 100,
|
|
embeddingMaxConcurrent: 1,
|
|
};
|
|
|
|
if (
|
|
existingSettings &&
|
|
typeof existingSettings === "object" &&
|
|
!Array.isArray(existingSettings)
|
|
) {
|
|
for (const [key, value] of Object.entries(existingSettings)) {
|
|
if (key in settings && typeof value === typeof settings[key]) {
|
|
settings[key] = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
name,
|
|
settings,
|
|
};
|
|
}
|
|
|
|
interface SummaryChunkVector {
|
|
chunk: SummaryChunk;
|
|
vector: memoryVector;
|
|
}
|
|
|
|
class HypaProcesserEx extends HypaProcesser {
|
|
// Maintain references to SummaryChunks and their associated memoryVectors
|
|
summaryChunkVectors: SummaryChunkVector[] = [];
|
|
|
|
async addSummaryChunks(chunks: SummaryChunk[]): Promise<void> {
|
|
// Maintain the superclass's caching structure by adding texts
|
|
const texts = chunks.map((chunk) => chunk.text);
|
|
|
|
await this.addText(texts);
|
|
|
|
// Create new SummaryChunkVectors
|
|
const newSummaryChunkVectors: SummaryChunkVector[] = [];
|
|
|
|
for (const chunk of chunks) {
|
|
const vector = this.vectors.find((v) => v.content === chunk.text);
|
|
|
|
if (!vector) {
|
|
throw new Error(
|
|
`Failed to create vector for summary chunk:\n${chunk.text}`
|
|
);
|
|
}
|
|
|
|
newSummaryChunkVectors.push({
|
|
chunk,
|
|
vector,
|
|
});
|
|
}
|
|
|
|
// Append new SummaryChunkVectors to the existing collection
|
|
this.summaryChunkVectors.push(...newSummaryChunkVectors);
|
|
}
|
|
|
|
async similaritySearchScoredEx(
|
|
query: string
|
|
): Promise<[SummaryChunk, number][]> {
|
|
const queryVector = (await this.getEmbeds(query))[0];
|
|
|
|
return this.summaryChunkVectors
|
|
.map((scv) => ({
|
|
chunk: scv.chunk,
|
|
similarity: similarity(queryVector, scv.vector.embedding),
|
|
}))
|
|
.sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
|
|
.map((result) => [result.chunk, result.similarity]);
|
|
}
|
|
}
|