Add Harunai Memory

This commit is contained in:
kwaroran
2024-04-23 22:07:44 +09:00
parent 376fa1641b
commit 2abd3bf15a
10 changed files with 198 additions and 81 deletions

View File

@@ -27,7 +27,7 @@
"@smithy/protocol-http": "^3.0.12", "@smithy/protocol-http": "^3.0.12",
"@smithy/signature-v4": "^2.0.19", "@smithy/signature-v4": "^2.0.19",
"@tauri-apps/api": "1.5.3", "@tauri-apps/api": "1.5.3",
"@xenova/transformers": "^2.14.0", "@xenova/transformers": "^2.17.1",
"blueimp-md5": "^2.19.0", "blueimp-md5": "^2.19.0",
"body-parser": "^1.20.2", "body-parser": "^1.20.2",
"buffer": "^6.0.3", "buffer": "^6.0.3",

15
pnpm-lock.yaml generated
View File

@@ -39,8 +39,8 @@ dependencies:
specifier: 1.5.3 specifier: 1.5.3
version: 1.5.3 version: 1.5.3
'@xenova/transformers': '@xenova/transformers':
specifier: ^2.14.0 specifier: ^2.17.1
version: 2.14.0 version: 2.17.1
blueimp-md5: blueimp-md5:
specifier: ^2.19.0 specifier: ^2.19.0
version: 2.19.0 version: 2.19.0
@@ -701,11 +701,6 @@ packages:
dev: true dev: true
optional: true optional: true
/@huggingface/jinja@0.1.2:
resolution: {integrity: sha512-x5mpbfJt1nKmVep5WNP5VjNsjWApWNj8pPYI+uYMkBWH9bWUJmQmHt2lbf0VCoQd54Oq3XuFEh/UyoVh7rPxmg==}
engines: {node: '>=18'}
dev: false
/@huggingface/jinja@0.2.2: /@huggingface/jinja@0.2.2:
resolution: {integrity: sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==} resolution: {integrity: sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==}
engines: {node: '>=18'} engines: {node: '>=18'}
@@ -1704,10 +1699,10 @@ packages:
resolution: {integrity: sha512-ggMz8nOygG7d/stpH40WVaNvBwuyYLnrg5Mbyf6bmsj/8+gb6Ei4ZZ9/4PNpcPNTT8th9Q8sM8wYmWGjMWLX/A==} resolution: {integrity: sha512-ggMz8nOygG7d/stpH40WVaNvBwuyYLnrg5Mbyf6bmsj/8+gb6Ei4ZZ9/4PNpcPNTT8th9Q8sM8wYmWGjMWLX/A==}
dev: true dev: true
/@xenova/transformers@2.14.0: /@xenova/transformers@2.17.1:
resolution: {integrity: sha512-rQ3O7SW5EM64b6XFZGx3XQ2cfiroefxUwU9ShfSpEZyhd082GvwNJJKndxgaukse1hZP1JUDoT0DfjDiq4IZiw==} resolution: {integrity: sha512-zo702tQAFZXhzeD2GCYUNUqeqkoueOdiSbQWa4s0q7ZE4z8WBIwIsMMPGobpgdqjQ2u0Qulo08wuqVEUrBXjkQ==}
dependencies: dependencies:
'@huggingface/jinja': 0.1.2 '@huggingface/jinja': 0.2.2
onnxruntime-web: 1.14.0 onnxruntime-web: 1.14.0
sharp: 0.32.6 sharp: 0.32.6
optionalDependencies: optionalDependencies:

View File

@@ -579,4 +579,5 @@ export const languageEnglish = {
tokenizer: "Tokenizer", tokenizer: "Tokenizer",
chatFormating: "Chat Formating", chatFormating: "Chat Formating",
useInstructPrompt: "Use Instruction Prompt", useInstructPrompt: "Use Instruction Prompt",
hanuraiMemory: "HanuraiMemory",
} }

View File

@@ -207,10 +207,40 @@
</SelectInput> </SelectInput>
</Arcodion> </Arcodion>
<Arcodion name={language.SuperMemory} styled> <Arcodion name={language.SuperMemory}/{language.hanuraiMemory} styled>
<span class="text-textcolor mt-4">{language.type}</span>
<SelectInput value={
$DataBase.supaMemoryType !== 'none' ? 'supaMemory' :
$DataBase.hanuraiEnable ? 'hanuraiMemory' : 'none'
} on:change={(v) => {
//@ts-ignore
const value = v.target.value
if (value === 'supaMemory'){
$DataBase.supaMemoryType = 'distilbart'
$DataBase.hanuraiEnable = false
} else if (value === 'hanuraiMemory'){
$DataBase.supaMemoryType = 'none'
$DataBase.hanuraiEnable = true
} else {
$DataBase.supaMemoryType = 'none'
$DataBase.hanuraiEnable = false
}
}}>
<OptionInput value="none" >None</OptionInput>
<OptionInput value="supaMemory" >{language.SuperMemory}</OptionInput>
<OptionInput value="hanuraiMemory" >{language.hanuraiMemory}</OptionInput>
</SelectInput>
{#if $DataBase.hanuraiEnable}
<span>Chunk Size</span>
<NumberInput size="sm" marginBottom bind:value={$DataBase.hanuraiTokens} min={100} />
<div class="flex">
<Check bind:check={$DataBase.hanuraiSplit} name="Text Spliting"/>
</div>
{:else if $DataBase.supaMemoryType !== 'none'}
<span class="text-textcolor mt-4">{language.SuperMemory} {language.model}</span> <span class="text-textcolor mt-4">{language.SuperMemory} {language.model}</span>
<SelectInput className="mt-2 mb-2" bind:value={$DataBase.supaMemoryType}> <SelectInput className="mt-2 mb-2" bind:value={$DataBase.supaMemoryType}>
<OptionInput value="none" >None</OptionInput>
<OptionInput value="distilbart" >distilbart-cnn-6-6 (Free/Local)</OptionInput> <OptionInput value="distilbart" >distilbart-cnn-6-6 (Free/Local)</OptionInput>
<OptionInput value="instruct35" >OpenAI 3.5 Turbo Instruct</OptionInput> <OptionInput value="instruct35" >OpenAI 3.5 Turbo Instruct</OptionInput>
<OptionInput value="subModel" >{language.submodel}</OptionInput> <OptionInput value="subModel" >{language.submodel}</OptionInput>
@@ -235,4 +265,5 @@
<div class="flex"> <div class="flex">
<Check bind:check={$DataBase.hypaMemory} name={language.enable + ' ' + language.HypaMemory}/> <Check bind:check={$DataBase.hypaMemory} name={language.enable + ' ' + language.HypaMemory}/>
</div> </div>
{/if}
</Arcodion> </Arcodion>

View File

@@ -26,6 +26,7 @@ import { runInlayScreen } from "./inlayScreen";
import { runCharacterJS } from "../plugins/embedscript"; import { runCharacterJS } from "../plugins/embedscript";
import { addRerolls } from "./prereroll"; import { addRerolls } from "./prereroll";
import { runImageEmbedding } from "./transformers"; import { runImageEmbedding } from "./transformers";
import { hanuraiMemory } from "./memory/hanuraiMemory";
export interface OpenAIChat{ export interface OpenAIChat{
role: 'system'|'user'|'assistant'|'function' role: 'system'|'user'|'assistant'|'function'
@@ -647,8 +648,24 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
index++ index++
} }
if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){
if(nowChatroom.supaMemory && (db.supaMemoryType !== 'none' || db.hanuraiEnable)){
chatProcessStage.set(2) chatProcessStage.set(2)
if(db.hanuraiEnable){
const hn = await hanuraiMemory(chats, {
currentTokens,
maxContextTokens,
tokenizer
})
if(hn === false){
return false
}
chats = hn.chats
currentTokens = hn.tokens
}
else{
const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer, { const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer, {
asHyper: db.hypaMemory asHyper: db.hypaMemory
}) })
@@ -662,7 +679,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
db.characters[selectedChar].chats[selectedChat].supaMemoryData = currentChat.supaMemoryData db.characters[selectedChar].chats[selectedChat].supaMemoryData = currentChat.supaMemoryData
console.log(currentChat.supaMemoryData) console.log(currentChat.supaMemoryData)
DataBase.set(db) DataBase.set(db)
currentChat.lastMemory = sp.lastId ?? currentChat.lastMemory currentChat.lastMemory = sp.lastId ?? currentChat.lastMemory;
}
chatProcessStage.set(1) chatProcessStage.set(1)
} }
else{ else{

View File

@@ -0,0 +1,94 @@
import { alertError } from "src/ts/alert";
import type { OpenAIChat } from "..";
import { HypaProcesser } from "./hypamemory";
import { language } from "src/lang";
import type { ChatTokenizer } from "src/ts/tokenizer";
import { get } from "svelte/store";
import { DataBase } from "src/ts/storage/database";
export async function hanuraiMemory(chats:OpenAIChat[],arg:{
currentTokens:number,
maxContextTokens:number,
tokenizer:ChatTokenizer
}){
const db = get(DataBase)
const tokenizer = arg.tokenizer
const processer = new HypaProcesser('nomic')
let addTexts:string[] = []
chats.map((chat) => {
if(!chat?.content?.trim()){
return
}
if(db.hanuraiSplit){
const splited = chat.content.split('\n\n')
for(const split of splited){
if(!split.trim()){
continue
}
addTexts.push(`search_document: ${split.trim()}`)
}
}
addTexts.push(`search_document: ${chat.content?.trim()}`)
})
processer.addText(addTexts)
let scoredResults:{[key:string]:number} = {}
for(let i=1;i<5;i++){
const chat = chats[chats.length-i]
if(!chat?.content){
continue
}
const scoredArray = (await processer.similaritySearchScored('search_query: ' + chat.content)).map((result) => {
return [result[0],result[1]/i] as [string,number]
})
for(const scored of scoredArray){
if(scoredResults[scored[0]]){
scoredResults[scored[0]] += scored[1]
}else{
scoredResults[scored[0]] = scored[1]
}
}
}
const vectorResult = Object.entries(scoredResults).sort((a,b)=>a[1]-b[1])
let tokens = arg.currentTokens + db.hanuraiTokens
while(tokens < arg.maxContextTokens){
const poped = chats.pop()
if(!poped){
alertError(language.errors.toomuchtoken + "\n\nRequired Tokens: " + tokens)
return false
}
tokens -= await tokenizer.tokenizeChat(chats[0])
}
tokens -= db.hanuraiTokens
let resultTexts:string[] = []
for(const vector of vectorResult){
const chat = chats.find((chat) => chat.content === vector[0].substring(14))
if(chat){
continue
}
const tokenized = await tokenizer.tokenizeChat(chat) + 2
tokens += tokenized
if(tokens >= arg.maxContextTokens){
tokens -= tokenized
break
}
resultTexts.push(vector[0].substring(14))
}
console.log(resultTexts)
chats.unshift({
role: "system",
memo: "supaMemory",
content: resultTexts.join('\n\n'),
})
return {
tokens,
chats
}
}

View File

@@ -92,7 +92,7 @@ export class HypaProcesser{
async addText(texts:string[]) { async addText(texts:string[]) {
for(let i=0;i<texts.length;i++){ for(let i=0;i<texts.length;i++){
const itm:memoryVector = await this.forage.getItem(texts[i]) const itm:memoryVector = await this.forage.getItem(texts[i] + '|' + this.model)
if(itm){ if(itm){
itm.alreadySaved = true itm.alreadySaved = true
this.vectors.push(itm) this.vectors.push(itm)
@@ -121,7 +121,7 @@ export class HypaProcesser{
for(let i=0;i<memoryVectors.length;i++){ for(let i=0;i<memoryVectors.length;i++){
const vec = memoryVectors[i] const vec = memoryVectors[i]
if(!vec.alreadySaved){ if(!vec.alreadySaved){
await this.forage.setItem(texts[i], vec) await this.forage.setItem(texts[i] + '|' + this.model, vec)
} }
} }

View File

@@ -1,28 +0,0 @@
import type { OpenAIChat } from "..";
import { HypaProcesser } from "./hypamemory";
export async function termMemory(chats:OpenAIChat[]){
const processer = new HypaProcesser('nomic')
processer.addText(chats.map(chat=>chat.content))
let scoredResults:{[key:string]:number}
for(let i=1;i<5;i++){
const chat = chats[chats.length-i]
if(!chat?.content){
continue
}
const scoredArray = (await processer.similaritySearchScored(chat.content)).map((result) => {
return [result[0],result[1]/i] as [string,number]
})
for(const scored of scoredArray){
if(scoredResults[scored[0]]){
scoredResults[scored[0]] += scored[1]
}else{
scoredResults[scored[0]] = scored[1]
}
}
}
const result = Object.entries(scoredResults).sort((a,b)=>a[1]-b[1])
return result.map(([content,score])=>(content)).join('\n\n')
}

View File

@@ -50,7 +50,8 @@ export const runSummarizer = async (text: string) => {
} }
let extractor:FeatureExtractionPipeline = null let extractor:FeatureExtractionPipeline = null
export const runEmbedding = async (text: string, model:'Xenova/all-MiniLM-L6-v2'|'nomic-ai/nomic-embed-text-v1.5' = 'Xenova/all-MiniLM-L6-v2'):Promise<Float32Array> => { type EmbeddingModel = 'Xenova/all-MiniLM-L6-v2'|'nomic-ai/nomic-embed-text-v1.5'
export const runEmbedding = async (text: string, model:EmbeddingModel = 'Xenova/all-MiniLM-L6-v2'):Promise<Float32Array> => {
await initTransformers() await initTransformers()
if(!extractor){ if(!extractor){
extractor = await pipeline('feature-extraction', model); extractor = await pipeline('feature-extraction', model);

View File

@@ -394,6 +394,9 @@ export function setDatabase(data:Database){
data.instructChatTemplate ??= "chatml" data.instructChatTemplate ??= "chatml"
data.openrouterProvider ??= '' data.openrouterProvider ??= ''
data.useInstructPrompt ??= false data.useInstructPrompt ??= false
data.hanuraiEnable ??= false
data.hanuraiSplit ??= true
data.hanuraiTokens ??= 1000
changeLanguage(data.language) changeLanguage(data.language)
DataBase.set(data) DataBase.set(data)
@@ -642,6 +645,9 @@ export interface Database{
JinjaTemplate:string JinjaTemplate:string
openrouterProvider:string openrouterProvider:string
useInstructPrompt:boolean useInstructPrompt:boolean
hanuraiTokens:number
hanuraiSplit:boolean
hanuraiEnable:boolean
} }
export interface customscript{ export interface customscript{
@@ -847,7 +853,6 @@ export interface botPreset{
top_a?:number top_a?:number
openrouterProvider?:string openrouterProvider?:string
useInstructPrompt?:boolean useInstructPrompt?:boolean
} }