HyperV2 Custom settings enhancement (#511)

# PR Checklist
- [x] Did you check if it works normally in all models? *ignore this
when it dosen't uses models*
- [x] Did you check if it works normally in all of web, local and node
hosted versions? if it dosen't, did you blocked it in those versions?
- [x] Did you added a type def?

# Description
~No bug, work as intended. But, haven't checked actual summary with it.
Very unstable, need confirmation/check.~
### **NO bug, work as intended, summarizes correctly, moving mainChunks
to chunks works well, no re-summarization issues. It works!**


Added new variable to database.ts, which indicates if HypaV2 Memory is
activated or not.
hypav2: boolean
This was added to stop overlapping of memoryType, when HypaV2 Memory is
selected and supaMemoryModel is changed.

Added supaMemoryType selection, HypaMemoryType selection,
supaMemoryPrompt changing section on OtherBotSettings.svelte, and
implemented on hypav2.ts.
Also added OpenAI key when summarization model is GPT 3.5 instruct.
Also suggested memoryAlgorithmType:string variable on database.ts, to
further add more memory types.

![image](https://github.com/kwaroran/RisuAI/assets/73149145/5d167b03-d7e7-41a1-8875-1780561cd3ac)

fixed minor punctuations, and changed summarize function of
hypav2.ts(same as the one on supaMemory.ts)
This commit is contained in:
kwaroran
2024-06-19 03:49:19 +09:00
committed by GitHub
7 changed files with 305 additions and 178 deletions

View File

@@ -714,7 +714,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
currentTokens += await tokenizer.tokenizeChat(chat)
}
if(nowChatroom.supaMemory && (db.supaMemoryType !== 'none' || db.hanuraiEnable)){
if(nowChatroom.supaMemory && (db.supaModelType !== 'none' || db.hanuraiEnable || db.hypav2)){
chatProcessStage.set(2)
if(db.hanuraiEnable){
const hn = await hanuraiMemory(chats, {
@@ -730,9 +730,11 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
chats = hn.chats
currentTokens = hn.tokens
}
else if(db.supaMemoryType === 'hypaV2'){
else if(db.hypav2){ //HypaV2 support needs to be changed like this.
const sp = await hypaMemoryV2(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer)
console.log("All chats: ", chats)
if(sp.error){
console.log(sp)
alertError(sp.error)
return false
}

View File

@@ -4,209 +4,306 @@ import type { ChatTokenizer } from "src/ts/tokenizer";
import { get } from "svelte/store";
import { requestChatData } from "../request";
import { HypaProcesser } from "./hypamemory";
import { globalFetch } from "src/ts/storage/globalApi";
import { runSummarizer } from "../transformers";
import { last, remove } from "lodash";
export interface HypaV2Data{
export interface HypaV2Data {
chunks: {
text:string
targetId:string
}[]
text: string;
targetId: string;
}[];
mainChunks: {
text:string
targetId:string
}[]
text: string;
targetId: string;
}[];
}
async function summary(stringlizedChat: string): Promise<{ success: boolean; data: string }> {
const db = get(DataBase);
console.log("Summarizing");
async function summary(stringlizedChat:string):Promise<{
success:boolean
data:string
}>{
const promptbody:OpenAIChat[] = [
{
role: "user",
content: stringlizedChat
},
{
role: "system",
content: "Summarize this roleplay scene in a coherent narrative format for future reference. Summarize what happened, focusing on events and interactions between them. If someone or something is new or changed, include a brief characterization of them."
}
]
const da = await requestChatData({
formated: promptbody,
bias: {},
useStreaming: false,
noMultiGen: true
}, 'model')
if(da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline'){
return {
data: "Hypamemory HTTP: " + da.result,
success: false
if (db.supaModelType === 'distilbart') {
try {
const sum = await runSummarizer(stringlizedChat);
return { success: true, data: sum };
} catch (error) {
return {
success: false,
data: "SupaMemory: Summarizer: " + `${error}`
};
}
}
return {
data: da.result,
success: true
const supaPrompt = db.supaMemoryPrompt === '' ?
"[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output to reduce tokens for gpt3 and other sublanguage models]\n"
: db.supaMemoryPrompt;
let result = '';
if (db.supaModelType !== 'subModel') {
const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:";
const da = await globalFetch("https://api.openai.com/v1/completions", {
headers: {
"Content-Type": "application/json",
"Authorization": "Bearer " + db.supaMemoryKey
},
method: "POST",
body: {
"model": db.supaModelType === 'curie' ? "text-curie-001"
: db.supaModelType === 'instruct35' ? 'gpt-3.5-turbo-instruct'
: "text-davinci-003",
"prompt": promptbody,
"max_tokens": 600,
"temperature": 0
}
})
console.log("Using openAI instruct 3.5 for SupaMemory");
try {
if (!da.ok) {
return {
success: false,
data: "SupaMemory: HTTP: " + JSON.stringify(da)
};
}
result = (await da.data)?.choices[0]?.text?.trim();
if (!result) {
return {
success: false,
data: "SupaMemory: HTTP: " + JSON.stringify(da)
};
}
return { success: true, data: result };
} catch (error) {
return {
success: false,
data: "SupaMemory: HTTP: " + error
};
}
} else {
const promptbody: OpenAIChat[] = [
{
role: "user",
content: stringlizedChat
},
{
role: "system",
content: supaPrompt
}
];
console.log("Using submodel: ", db.subModel, "for supaMemory model");
const da = await requestChatData({
formated: promptbody,
bias: {},
useStreaming: false,
noMultiGen: true
}, 'submodel');
if (da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline') {
return {
success: false,
data: "SupaMemory: HTTP: " + da.result
};
}
result = da.result;
}
return { success: true, data: result };
}
export async function hypaMemoryV2(
chats:OpenAIChat[],
currentTokens:number,
maxContextTokens:number,
room:Chat,
char:character|groupChat,
tokenizer:ChatTokenizer,
arg:{asHyper?:boolean} = {}
): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:HypaV2Data;}>{
chats: OpenAIChat[],
currentTokens: number,
maxContextTokens: number,
room: Chat,
char: character | groupChat,
tokenizer: ChatTokenizer,
arg: { asHyper?: boolean, summaryModel?: string, summaryPrompt?: string, hypaModel?: string } = {}
): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?: string; memory?: HypaV2Data; }> {
const db = get(DataBase)
const db = get(DataBase);
const data: HypaV2Data = room.hypaV2Data ?? { chunks: [], mainChunks: [] };
const data:HypaV2Data = room.hypaV2Data ?? {
chunks:[],
mainChunks:[]
}
//this is for the prompt
let allocatedTokens = db.hypaAllocatedTokens;
let chunkSize = db.hypaChunkSize;
currentTokens += allocatedTokens + 50;
let mainPrompt = "";
const lastTwoChats = chats.slice(-2);
// Error handling for infinite summarization attempts
let summarizationFailures = 0;
const maxSummarizationFailures = 3;
let lastMainChunkTargetId = '';
let allocatedTokens = db.hypaAllocatedTokens
let chunkSize = db.hypaChunkSize
currentTokens += allocatedTokens
currentTokens += 50 //this is for the template prompt
let mainPrompt = ""
// Ensure correct targetId matching
const getValidChatIndex = (targetId: string) => {
return chats.findIndex(chat => chat.memo === targetId);
};
while(data.mainChunks.length > 0){
const chunk = data.mainChunks[0]
const ind = chats.findIndex(e => e.memo === chunk.targetId)
if(ind === -1){
data.mainChunks.shift()
continue
// Processing mainChunks
if (data.mainChunks.length > 0) {
const chunk = data.mainChunks[0];
const ind = getValidChatIndex(chunk.targetId);
if (ind !== -1) {
const removedChats = chats.splice(0, ind + 1);
console.log("removed chats", removedChats);
for (const chat of removedChats) {
currentTokens -= await tokenizer.tokenizeChat(chat);
}
mainPrompt = chunk.text;
const mpToken = await tokenizer.tokenizeChat({ role: 'system', content: mainPrompt });
allocatedTokens -= mpToken;
}
const removedChats = chats.splice(0, ind)
for(const chat of removedChats){
currentTokens -= await tokenizer.tokenizeChat(chat)
}
chats = chats.slice(ind)
mainPrompt = chunk.text
const mpToken = await tokenizer.tokenizeChat({role:'system', content:mainPrompt})
allocatedTokens -= mpToken
break
}
while(currentTokens >= maxContextTokens){
let idx = 0
let targetId = ''
const halfData:OpenAIChat[] = []
// Token management loop
while (currentTokens >= maxContextTokens) {
let idx = 0;
let targetId = '';
const halfData: OpenAIChat[] = [];
let halfDataTokens = 0
while(halfDataTokens < chunkSize){
const chat = chats[idx]
if(!chat){
break
}
halfDataTokens += await tokenizer.tokenizeChat(chat)
halfData.push(chat)
idx++
targetId = chat.memo
let halfDataTokens = 0;
while (halfDataTokens < chunkSize && (idx <= chats.length - 4)) { // Ensure latest two chats are not added to summarization.
const chat = chats[idx];
halfDataTokens += await tokenizer.tokenizeChat(chat);
halfData.push(chat);
idx++;
targetId = chat.memo;
console.log("current target chat: ", chat);
}
const stringlizedChat = halfData.map(e => `${e.role}: ${e.content}`).join('\n')
// Avoid summarizing the last two chats
if (halfData.length < 3) break;
const summaryData = await summary(stringlizedChat)
const stringlizedChat = halfData.map(e => `${e.role}: ${e.content}`).join('\n');
const summaryData = await summary(stringlizedChat);
if(!summaryData.success){
return {
currentTokens: currentTokens,
chats: chats,
error: summaryData.data
if (!summaryData.success) {
summarizationFailures++;
if (summarizationFailures >= maxSummarizationFailures) {
return {
currentTokens: currentTokens,
chats: chats,
error: "Summarization failed multiple times. Aborting to prevent infinite loop."
};
}
continue;
}
const summaryDataToken = await tokenizer.tokenizeChat({role:'system', content:summaryData.data})
mainPrompt += `\n\n${summaryData.data}`
currentTokens -= halfDataTokens
allocatedTokens -= summaryDataToken
summarizationFailures = 0; // Reset failure counter on success
const summaryDataToken = await tokenizer.tokenizeChat({ role: 'system', content: summaryData.data });
mainPrompt += `\n\n${summaryData.data}`;
currentTokens -= halfDataTokens;
allocatedTokens -= summaryDataToken;
data.mainChunks.unshift({
text: mainPrompt,
text: summaryData.data,
targetId: targetId
})
});
if(allocatedTokens < 1500){
const summarizedMp = await summary(mainPrompt)
const mpToken = await tokenizer.tokenizeChat({role:'system', content:mainPrompt})
const summaryToken = await tokenizer.tokenizeChat({role:'system', content:summarizedMp.data})
// Split the summary into chunks based on double line breaks
const splitted = summaryData.data.split('\n\n').map(e => e.trim()).filter(e => e.length > 0);
allocatedTokens -= summaryToken
allocatedTokens += mpToken
// Update chunks with the new summary
data.chunks.push(...splitted.map(e => ({
text: e,
targetId: targetId
})));
const splited = mainPrompt.split('\n\n').map(e => e.trim()).filter(e => e.length > 0)
data.chunks.push(...splited.map(e => ({
text: e,
targetId: targetId
})))
data.mainChunks[0].text = mainPrompt
}
// Remove summarized chats
chats.splice(0, idx);
}
const processer = new HypaProcesser("nomic")
await processer.addText(data.chunks.filter(v => {
return v.text.trim().length > 0
}).map((v) => {
return "search_document: " + v.text.trim()
}))
// Construct the mainPrompt from mainChunks until half of the allocatedTokens are used
mainPrompt = "";
let mainPromptTokens = 0;
for (const chunk of data.mainChunks) {
const chunkTokens = await tokenizer.tokenizeChat({ role: 'system', content: chunk.text });
if (mainPromptTokens + chunkTokens > allocatedTokens / 2) break;
mainPrompt += `\n\n${chunk.text}`;
mainPromptTokens += chunkTokens;
lastMainChunkTargetId = chunk.targetId;
}
let scoredResults:{[key:string]:number} = {}
for(let i=0;i<3;i++){
const pop = chats[chats.length - i - 1]
if(!pop){
break
// Fetch additional memory from chunks
const processor = new HypaProcesser(db.hypaModel);
processor.oaikey = db.supaMemoryKey;
// Find the smallest index of chunks with the same targetId as lastMainChunkTargetId
const lastMainChunkIndex = data.chunks.reduce((minIndex, chunk, index) => {
if (chunk.targetId === lastMainChunkTargetId) {
return Math.min(minIndex, index);
}
const searched = await processer.similaritySearchScored(`search_query: ${pop.content}`)
for(const result of searched){
const score = result[1]/(i+1)
if(scoredResults[result[0]]){
scoredResults[result[0]] += score
}else{
scoredResults[result[0]] = score
}
return minIndex;
}, data.chunks.length);
// Filter chunks to only include those older than the last mainChunk's targetId
const olderChunks = lastMainChunkIndex !== data.chunks.length
? data.chunks.slice(0, lastMainChunkIndex)
: data.chunks;
console.log("Older Chunks:", olderChunks);
// Add older chunks to processor for similarity search
await processor.addText(olderChunks.filter(v => v.text.trim().length > 0).map(v => "search_document: " + v.text.trim()));
let scoredResults: { [key: string]: number } = {};
for (let i = 0; i < 3; i++) {
const pop = chats[chats.length - i - 1];
if (!pop) break;
const searched = await processor.similaritySearchScored(`search_query: ${pop.content}`);
for (const result of searched) {
const score = result[1] / (i + 1);
scoredResults[result[0]] = (scoredResults[result[0]] || 0) + score;
}
}
const scoredArray = Object.entries(scoredResults).sort((a,b) => b[1] - a[1])
let chunkResultPrompts = ""
while(allocatedTokens > 0){
const target = scoredArray.shift()
if(!target){
break
}
const tokenized = await tokenizer.tokenizeChat({
role: 'system',
content: target[0].substring(14)
})
if(tokenized > allocatedTokens){
break
}
chunkResultPrompts += target[0].substring(14) + '\n\n'
allocatedTokens -= tokenized
const scoredArray = Object.entries(scoredResults).sort((a, b) => b[1] - a[1]);
let chunkResultPrompts = "";
let chunkResultTokens = 0;
while (allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && scoredArray.length > 0) {
const [text] = scoredArray.shift();
const tokenized = await tokenizer.tokenizeChat({ role: 'system', content: text.substring(14) });
if (tokenized > allocatedTokens - mainPromptTokens - chunkResultTokens) break;
chunkResultPrompts += text.substring(14) + '\n\n';
chunkResultTokens += tokenized;
}
const fullResult = `<Past Events Summary>${mainPrompt}</Past Events Summary>\n<Past Events Details>${chunkResultPrompts}</Past Events Details>`
const fullResult = `<Past Events Summary>${mainPrompt}</Past Events Summary>\n<Past Events Details>${chunkResultPrompts}</Past Events Details>`;
chats.unshift({
role: "system",
content: fullResult,
memo: "supaMemory"
})
});
// Add the remaining chats after the last mainChunk's targetId
const lastTargetId = data.mainChunks.length > 0 ? data.mainChunks[0].targetId : null;
if (lastTargetId) {
const lastIndex = getValidChatIndex(lastTargetId);
if (lastIndex !== -1) {
const remainingChats = chats.slice(lastIndex + 1);
chats = [chats[0], ...remainingChats];
}
}
// Add last two chats if they exist and are not duplicates
if (lastTwoChats.length === 2) {
const [lastChat1, lastChat2] = lastTwoChats;
if (!chats.some(chat => chat.memo === lastChat1.memo)) {
chats.push(lastChat1);
}
if (!chats.some(chat => chat.memo === lastChat2.memo)) {
chats.push(lastChat2);
}
}
console.log("model being used: ", db.hypaModel, db.supaModelType, "\nCurrent session tokens: ", currentTokens, "\nAll chats, including memory system prompt: ", chats, "\nMemory data, with all the chunks: ", data);
return {
currentTokens: currentTokens,
chats: chats,
memory: data
}
}
};
}

View File

@@ -183,7 +183,7 @@ export async function supaMemory(
async function summarize(stringlizedChat:string){
if(db.supaMemoryType === 'distilbart'){
if(db.supaModelType === 'distilbart'){
try {
const sum = await runSummarizer(stringlizedChat)
return sum
@@ -204,7 +204,7 @@ export async function supaMemory(
let result = ''
if(db.supaMemoryType !== 'subModel'){
if(db.supaModelType !== 'subModel'){
const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:"
const da = await globalFetch("https://api.openai.com/v1/completions",{
@@ -214,8 +214,8 @@ export async function supaMemory(
},
method: "POST",
body: {
"model": db.supaMemoryType === 'curie' ? "text-curie-001"
: db.supaMemoryType === 'instruct35' ? 'gpt-3.5-turbo-instruct'
"model": db.supaModelType === 'curie' ? "text-curie-001"
: db.supaModelType === 'instruct35' ? 'gpt-3.5-turbo-instruct'
: "text-davinci-003",
"prompt": promptbody,
"max_tokens": 600,

View File

@@ -230,8 +230,8 @@ export function setDatabase(data:Database){
if(checkNullish(data.supaMemoryKey)){
data.supaMemoryKey = ""
}
if(checkNullish(data.supaMemoryType)){
data.supaMemoryType = "none"
if(checkNullish(data.supaModelType)){
data.supaModelType = "none"
}
if(checkNullish(data.askRemoval)){
data.askRemoval = true
@@ -527,7 +527,7 @@ export interface Database{
useStreaming:boolean
palmAPI:string,
supaMemoryKey:string
supaMemoryType:string
supaModelType:string
textScreenColor?:string
textBorder?:boolean
textScreenRounded?:boolean
@@ -569,6 +569,8 @@ export interface Database{
useAdditionalAssetsPreview:boolean,
usePlainFetch:boolean
hypaMemory:boolean
hypav2:boolean
memoryAlgorithmType:string // To enable new memory module/algorithms
proxyRequestModel:string
ooba:OobaSettings
ainconfig: AINsettings