[feat] hypamemory first commit

This commit is contained in:
kwaroran
2023-06-29 00:25:51 +09:00
parent 1026945996
commit 6dc105c69a
5 changed files with 727 additions and 33 deletions

View File

@@ -11,7 +11,7 @@ import { stableDiff } from "./stableDiff";
import { processScript, processScriptFull } from "./scripts";
import { exampleMessage } from "./exampleMessages";
import { sayTTS } from "./tts";
import { supaMemory } from "./supaMemory";
import { supaMemory } from "./memory/supaMemory";
import { v4 } from "uuid";
import { cloneDeep } from "lodash";
import { groupOrder } from "./group";

View File

@@ -0,0 +1,139 @@
import localforage from "localforage";
import { similarity } from "ml-distance";
import { globalFetch } from "src/ts/storage/globalApi";
export class HypaProcesser{
oaikey:string
vectors:memoryVector[]
forage:LocalForage
constructor(){
this.forage = localforage.createInstance({
name: "hypaVector"
})
}
async embedDocuments(texts: string[]): Promise<number[][]> {
const subPrompts = chunkArray(texts,512);
const embeddings: number[][] = [];
for (let i = 0; i < subPrompts.length; i += 1) {
const input = subPrompts[i];
const data = await this.getEmbeds(input)
embeddings.push(...data);
}
return embeddings;
}
async getEmbeds(input:string[]|string) {
const gf = await globalFetch("https://api.openai.com/v1/embeddings", {
headers: {
"Authorization": "Bearer " + this.oaikey
},
body: {
"input": input,
"model": "text-embedding-ada-002"
}
})
const data = gf.data
if(!gf.ok){
throw gf.data
}
const result:number[][] = []
for(let i=0;i<data.data.length;i++){
result.push(data.data[i].embedding)
}
return result
}
async addText(texts:string[]) {
for(let i=0;i<texts.length;i++){
const itm:memoryVector = await this.forage.getItem(texts[i])
if(itm){
itm.alreadySaved = true
this.vectors.push(itm)
}
}
texts = texts.filter((v) => {
for(let i=0;i<vectors.length;i++){
if(this.vectors[i].content === v){
return false
}
}
return true
})
if(texts.length === 0){
return
}
const vectors = await this.embedDocuments(texts)
const memoryVectors:memoryVector[] = vectors.map((embedding, idx) => ({
content: texts[idx],
embedding
}));
for(let i=0;i<memoryVectors.length;i++){
const vec = memoryVectors[i]
if(!vec.alreadySaved){
await this.forage.setItem(texts[i], vec)
}
}
this.vectors = memoryVectors.concat(this.vectors)
}
async similaritySearch(query: string) {
const results = await this.similaritySearchVectorWithScore((await this.getEmbeds(query))[0],);
return results.map((result) => result[0]);
}
async similaritySearchVectorWithScore(
query: number[],
): Promise<[string, number][]> {
const memoryVectors = this.vectors
const searches = memoryVectors
.map((vector, index) => ({
similarity: similarity.cosine(query, vector.embedding),
index,
}))
.sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
const result: [string, number][] = searches.map((search) => [
memoryVectors[search.index].content,
search.similarity,
]);
return result;
}
}
type memoryVector = {
embedding:number[]
content:string,
alreadySaved?:boolean
}
const chunkArray = <T>(arr: T[], chunkSize: number) =>
arr.reduce((chunks, elem, index) => {
const chunkIndex = Math.floor(index / chunkSize);
const chunk = chunks[chunkIndex] || [];
chunks[chunkIndex] = chunk.concat([elem]);
return chunks;
}, [] as T[][]);

View File

@@ -1,8 +1,11 @@
import { get } from "svelte/store";
import type { OpenAIChat } from ".";
import { DataBase, type Chat, type character, type groupChat } from "../storage/database";
import { tokenize, type ChatTokenizer } from "../tokenizer";
import { requestChatData } from "./request";
import type { OpenAIChat } from "..";
import { DataBase, type Chat, type character, type groupChat } from "../../storage/database";
import { tokenize, type ChatTokenizer } from "../../tokenizer";
import { requestChatData } from "../request";
import { cloneDeep } from "lodash";
import { HypaProcesser } from "./hypamemory";
import { stringlizeChat } from "../stringlize";
export async function supaMemory(
chats:OpenAIChat[],
@@ -10,7 +13,8 @@ export async function supaMemory(
maxContextTokens:number,
room:Chat,
char:character|groupChat,
tokenizer:ChatTokenizer
tokenizer:ChatTokenizer,
arg:{asHyper?:boolean} = {}
): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{
const db = get(DataBase)
@@ -32,33 +36,98 @@ export async function supaMemory(
}
let supaMemory = ''
let hypaChunks:string[] = []
let lastId = ''
let HypaData:HypaData[] = []
if(room.supaMemoryData && room.supaMemoryData.length > 4){
const splited = room.supaMemoryData.split('\n')
const id = splited.splice(0,1)[0]
let id = splited.splice(0,1)[0]
const data = splited.join('\n')
let i =0;
while(true){
if(chats.length === 0){
return {
currentTokens: currentTokens,
chats: chats,
error: "SupaMemory: chat ID not found"
}
}
if(chats[0].memo === id){
lastId = id
break
}
currentTokens -= await tokenizer.tokenizeChat(chats[0])
chats.splice(0, 1)
i += 1
}
if(arg.asHyper && (!id.startsWith("hypa:"))){
supaMemory = ""
supaMemory = data
currentTokens += await tokenize(supaMemory)
}
else{
if(id.startsWith("hypa:")){
if((!arg.asHyper)){
return {
currentTokens: currentTokens,
chats: chats,
error: "SupaMemory: Data saved in hypaMemory, loaded as SupaMemory."
}
}
HypaData = JSON.parse(data.substring(0,5).trim())
if(!Array.isArray(HypaData)){
return {
currentTokens: currentTokens,
chats: chats,
error: "hypaMemory: hypaMemory isn't Array"
}
}
let indexSelected = -1
for(let i=0;i<HypaData.length;i++){
let i =0;
let countTokens = currentTokens
let countChats = cloneDeep(chats)
while(true){
if(countChats.length === 0){
break
}
if(countChats[0].memo === HypaData[i].id){
lastId = HypaData[i].id
currentTokens = countTokens
chats = countChats
indexSelected = i
break
}
countTokens -= await tokenizer.tokenizeChat(countChats[0])
countChats.splice(0, 1)
i += 1
}
if(indexSelected !== -1){
break
}
}
if(indexSelected === -1){
return {
currentTokens: currentTokens,
chats: chats,
error: "hypaMemory: chat ID not found"
}
}
supaMemory = HypaData[indexSelected].supa
hypaChunks = HypaData[indexSelected].hypa
}
else{
let i =0;
while(true){
if(chats.length === 0){
return {
currentTokens: currentTokens,
chats: chats,
error: "SupaMemory: chat ID not found"
}
}
if(chats[0].memo === id){
lastId = id
break
}
currentTokens -= await tokenizer.tokenizeChat(chats[0])
chats.splice(0, 1)
i += 1
}
supaMemory = data
currentTokens += await tokenize(supaMemory)
}
}
}
@@ -135,6 +204,20 @@ export async function supaMemory(
return result
}
let hypaResult = ""
if(arg.asHyper){
const hypa = new HypaProcesser()
await hypa.addText(hypaChunks)
const filteredChat = chats.filter((r) => r.role !== 'system' && r.role !== 'function')
const s = await hypa.similaritySearch(stringlizeChat(filteredChat.slice(0, 4)))
hypaResult = s.slice(0,4).join("\n\n")
currentTokens += await tokenizer.tokenizeChat({
role: "assistant",
content: hypaResult
})
}
while(currentTokens > maxContextTokens){
const beforeToken = currentTokens
let maxChunkSize = maxContextTokens > 3500 ? 1200 : Math.floor(maxContextTokens / 3)
@@ -181,7 +264,11 @@ export async function supaMemory(
const tokens = await tokenizer.tokenizeChat(cont)
if((chunkSize + tokens) > maxChunkSize){
if(stringlizedChat === ''){
stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
if(cont.role !== 'function' && cont.role !== 'system'){
stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
}
}
lastId = cont.memo
break
@@ -203,13 +290,67 @@ export async function supaMemory(
const tokenz = await tokenize(result + '\n\n')
currentTokens += tokenz
supaMemory += result.replace(/\n+/g,'\n') + '\n\n'
let SupaMemoryList = supaMemory.split('\n\n')
if(SupaMemoryList.length >= 5){
const oldSupaMemory = supaMemory
let modifies:string[] = []
for(let i=0;i<3;i++){
modifies.push(SupaMemoryList.shift())
}
hypaChunks.push(...modifies)
const result = await summarize(supaMemory)
if(typeof(result) !== 'string'){
return result
}
modifies.unshift(result.replace(/\n+/g,'\n'))
supaMemory = modifies.join('\n\n') + '\n\n'
currentTokens -= await tokenize(oldSupaMemory)
currentTokens += await tokenize(supaMemory)
}
}
}
chats.unshift({
role: "system",
content: supaMemory
content: supaMemory,
name: "supaMemory"
})
if(arg.asHyper){
if(hypaResult !== ''){
chats.unshift({
role: "assistant",
content: hypaResult
})
}
if(HypaData[0] && HypaData[0].id === lastId){
HypaData[0].hypa = hypaChunks
HypaData[0].supa = supaMemory
}
else{
HypaData.push({
id: lastId,
hypa: hypaChunks,
supa: supaMemory
})
}
return {
currentTokens: currentTokens,
chats: chats,
memory: JSON.stringify(HypaData, null, 2),
lastId: lastId
}
}
return {
currentTokens: currentTokens,
chats: chats,
@@ -224,3 +365,5 @@ export async function supaMemory(
}
}
type HypaData = {id:string,supa:string,hypa:string[]}