[feat] new tokenizing
This commit is contained in:
@@ -2,7 +2,7 @@ import type { OpenAIChat } from ".";
|
|||||||
import type { character } from "../storage/database";
|
import type { character } from "../storage/database";
|
||||||
import { replacePlaceholders } from "../util";
|
import { replacePlaceholders } from "../util";
|
||||||
|
|
||||||
export function exampleMessage(char:character):OpenAIChat[]{
|
export function exampleMessage(char:character, userName:string):OpenAIChat[]{
|
||||||
if(char.exampleMessage === ''){
|
if(char.exampleMessage === ''){
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
@@ -34,14 +34,16 @@ export function exampleMessage(char:character):OpenAIChat[]{
|
|||||||
add()
|
add()
|
||||||
currentMessage = {
|
currentMessage = {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: trimed.split(':', 2)[1]
|
content: trimed.split(':', 2)[1],
|
||||||
|
name: 'example_' + char.name
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(lowered.startsWith('{{user}}:') || lowered.startsWith('<user>:')){
|
else if(lowered.startsWith('{{user}}:') || lowered.startsWith('<user>:')){
|
||||||
add()
|
add()
|
||||||
currentMessage = {
|
currentMessage = {
|
||||||
role: "user",
|
role: "user",
|
||||||
content: trimed.split(':', 2)[1]
|
content: trimed.split(':', 2)[1],
|
||||||
|
name: 'example_' + userName
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { get, writable } from "svelte/store";
|
import { get, writable } from "svelte/store";
|
||||||
import { DataBase, setDatabase, type character } from "../storage/database";
|
import { DataBase, setDatabase, type character } from "../storage/database";
|
||||||
import { CharEmotion, selectedCharID } from "../stores";
|
import { CharEmotion, selectedCharID } from "../stores";
|
||||||
import { tokenize, tokenizeNum } from "../tokenizer";
|
import { ChatTokenizer, tokenizeNum } from "../tokenizer";
|
||||||
import { language } from "../../lang";
|
import { language } from "../../lang";
|
||||||
import { alertError } from "../alert";
|
import { alertError } from "../alert";
|
||||||
import { loadLoreBookPrompt } from "./lorebook";
|
import { loadLoreBookPrompt } from "./lorebook";
|
||||||
@@ -15,7 +15,6 @@ import { supaMemory } from "./supaMemory";
|
|||||||
import { v4 } from "uuid";
|
import { v4 } from "uuid";
|
||||||
import { cloneDeep } from "lodash";
|
import { cloneDeep } from "lodash";
|
||||||
import { groupOrder } from "./group";
|
import { groupOrder } from "./group";
|
||||||
import { getNameMaxTokens } from "./stringlize";
|
|
||||||
|
|
||||||
export interface OpenAIChat{
|
export interface OpenAIChat{
|
||||||
role: 'system'|'user'|'assistant'
|
role: 'system'|'user'|'assistant'
|
||||||
@@ -69,7 +68,6 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
|||||||
if(nowChatroom.type === 'group'){
|
if(nowChatroom.type === 'group'){
|
||||||
if(chatProcessIndex === -1){
|
if(chatProcessIndex === -1){
|
||||||
const charNames =nowChatroom.characters.map((v) => findCharacterbyIdwithCache(v).name)
|
const charNames =nowChatroom.characters.map((v) => findCharacterbyIdwithCache(v).name)
|
||||||
caculatedChatTokens += await getNameMaxTokens([...charNames, db.username])
|
|
||||||
|
|
||||||
const messages = nowChatroom.chats[nowChatroom.chatPage].message
|
const messages = nowChatroom.chats[nowChatroom.chatPage].message
|
||||||
const lastMessage = messages[messages.length-1]
|
const lastMessage = messages[messages.length-1]
|
||||||
@@ -110,14 +108,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
|||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
currentChar = nowChatroom
|
currentChar = nowChatroom
|
||||||
if(!db.aiModel.startsWith('gpt')){
|
|
||||||
caculatedChatTokens += await getNameMaxTokens([currentChar.name, db.username])
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let chatAdditonalTokens = arg.chatAdditonalTokens ?? caculatedChatTokens
|
let chatAdditonalTokens = arg.chatAdditonalTokens ?? caculatedChatTokens
|
||||||
|
const tokenizer = new ChatTokenizer(chatAdditonalTokens, db.aiModel.startsWith('gpt') ? 'noName' : 'name')
|
||||||
let selectedChat = nowChatroom.chatPage
|
let selectedChat = nowChatroom.chatPage
|
||||||
let currentChat = nowChatroom.chats[selectedChat]
|
let currentChat = nowChatroom.chats[selectedChat]
|
||||||
let maxContextTokens = db.maxContext
|
let maxContextTokens = db.maxContext
|
||||||
@@ -205,17 +199,17 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
|||||||
})
|
})
|
||||||
|
|
||||||
//await tokenize currernt
|
//await tokenize currernt
|
||||||
let currentTokens = (await tokenize(Object.keys(unformated).map((key) => {
|
let currentTokens = 0
|
||||||
return (unformated[key] as OpenAIChat[]).map((d) => {
|
|
||||||
return d.content
|
for(const key in unformated){
|
||||||
}).join('\n\n')
|
currentTokens += await tokenizer.tokenizeChat(unformated[key])
|
||||||
}).join('\n\n')) + db.maxResponse) + 130
|
}
|
||||||
|
|
||||||
|
|
||||||
const examples = exampleMessage(currentChar)
|
const examples = exampleMessage(currentChar, db.username)
|
||||||
|
|
||||||
for(const example of examples){
|
for(const example of examples){
|
||||||
currentTokens += await tokenize(example.content) + chatAdditonalTokens
|
currentTokens += await tokenizer.tokenizeChat(example)
|
||||||
}
|
}
|
||||||
|
|
||||||
let chats:OpenAIChat[] = examples
|
let chats:OpenAIChat[] = examples
|
||||||
@@ -230,15 +224,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
|||||||
if(nowChatroom.type !== 'group'){
|
if(nowChatroom.type !== 'group'){
|
||||||
const firstMsg = nowChatroom.firstMsgIndex === -1 ? nowChatroom.firstMessage : nowChatroom.alternateGreetings[nowChatroom.firstMsgIndex]
|
const firstMsg = nowChatroom.firstMsgIndex === -1 ? nowChatroom.firstMessage : nowChatroom.alternateGreetings[nowChatroom.firstMsgIndex]
|
||||||
|
|
||||||
chats.push({
|
const chat:OpenAIChat = {
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: processScript(currentChar,
|
content: processScript(currentChar,
|
||||||
replacePlaceholders(firstMsg, currentChar.name),
|
replacePlaceholders(firstMsg, currentChar.name),
|
||||||
'editprocess')
|
'editprocess')
|
||||||
})
|
}
|
||||||
currentTokens += await tokenize(processScript(currentChar,
|
chats.push(chat)
|
||||||
replacePlaceholders(firstMsg, currentChar.name),
|
currentTokens += await tokenizer.tokenizeChat(chat)
|
||||||
'editprocess'))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const ms = currentChat.message
|
const ms = currentChat.message
|
||||||
@@ -259,17 +252,18 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
|||||||
if(!msg.chatId){
|
if(!msg.chatId){
|
||||||
msg.chatId = v4()
|
msg.chatId = v4()
|
||||||
}
|
}
|
||||||
chats.push({
|
const chat:OpenAIChat = {
|
||||||
role: msg.role === 'user' ? 'user' : 'assistant',
|
role: msg.role === 'user' ? 'user' : 'assistant',
|
||||||
content: formedChat,
|
content: formedChat,
|
||||||
memo: msg.chatId,
|
memo: msg.chatId,
|
||||||
name: name
|
name: name
|
||||||
})
|
}
|
||||||
currentTokens += (await tokenize(formedChat) + chatAdditonalTokens)
|
chats.push(chat)
|
||||||
|
currentTokens += await tokenizer.tokenizeChat(chat)
|
||||||
}
|
}
|
||||||
|
|
||||||
if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){
|
if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){
|
||||||
const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, chatAdditonalTokens)
|
const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer)
|
||||||
if(sp.error){
|
if(sp.error){
|
||||||
alertError(sp.error)
|
alertError(sp.error)
|
||||||
return false
|
return false
|
||||||
@@ -287,7 +281,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens)
|
currentTokens -= await tokenizer.tokenizeChat(chats[0])
|
||||||
chats.splice(0, 1)
|
chats.splice(0, 1)
|
||||||
}
|
}
|
||||||
currentChat.lastMemory = chats[0].memo
|
currentChat.lastMemory = chats[0].memo
|
||||||
|
|||||||
@@ -53,15 +53,4 @@ export function unstringlizeChat(text:string, formated:OpenAIChat[], char:string
|
|||||||
}
|
}
|
||||||
|
|
||||||
return text
|
return text
|
||||||
}
|
|
||||||
|
|
||||||
export async function getNameMaxTokens(names:string[]){
|
|
||||||
let maxCharNameTokens = 0
|
|
||||||
for(const name of names){
|
|
||||||
const tokens = await tokenize(name + ': ') + 1
|
|
||||||
if(maxCharNameTokens < tokens){
|
|
||||||
maxCharNameTokens = tokens
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return maxCharNameTokens
|
|
||||||
}
|
}
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
import { get } from "svelte/store";
|
import { get } from "svelte/store";
|
||||||
import type { OpenAIChat } from ".";
|
import type { OpenAIChat } from ".";
|
||||||
import { DataBase, type Chat, type character, type groupChat } from "../storage/database";
|
import { DataBase, type Chat, type character, type groupChat } from "../storage/database";
|
||||||
import { tokenize } from "../tokenizer";
|
import { tokenize, type ChatTokenizer } from "../tokenizer";
|
||||||
import { findCharacterbyId } from "../util";
|
|
||||||
import { requestChatData } from "./request";
|
import { requestChatData } from "./request";
|
||||||
|
|
||||||
export async function supaMemory(
|
export async function supaMemory(
|
||||||
@@ -11,7 +10,7 @@ export async function supaMemory(
|
|||||||
maxContextTokens:number,
|
maxContextTokens:number,
|
||||||
room:Chat,
|
room:Chat,
|
||||||
char:character|groupChat,
|
char:character|groupChat,
|
||||||
chatAdditonalTokens:number
|
tokenizer:ChatTokenizer
|
||||||
): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{
|
): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{
|
||||||
const db = get(DataBase)
|
const db = get(DataBase)
|
||||||
|
|
||||||
@@ -27,7 +26,7 @@ export async function supaMemory(
|
|||||||
}
|
}
|
||||||
if(coIndex !== -1){
|
if(coIndex !== -1){
|
||||||
for(let i=0;i<coIndex;i++){
|
for(let i=0;i<coIndex;i++){
|
||||||
currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens)
|
currentTokens -= await tokenizer.tokenizeChat(chats[0])
|
||||||
chats.splice(0, 1)
|
chats.splice(0, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -53,13 +52,13 @@ export async function supaMemory(
|
|||||||
lastId = id
|
lastId = id
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens)
|
currentTokens -= await tokenizer.tokenizeChat(chats[0])
|
||||||
chats.splice(0, 1)
|
chats.splice(0, 1)
|
||||||
i += 1
|
i += 1
|
||||||
}
|
}
|
||||||
|
|
||||||
supaMemory = data
|
supaMemory = data
|
||||||
currentTokens += await tokenize(supaMemory) + chatAdditonalTokens
|
currentTokens += await tokenize(supaMemory)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -179,7 +178,7 @@ export async function supaMemory(
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
const tokens = await tokenize(cont.content) + chatAdditonalTokens
|
const tokens = await tokenizer.tokenizeChat(cont)
|
||||||
if((chunkSize + tokens) > maxChunkSize){
|
if((chunkSize + tokens) > maxChunkSize){
|
||||||
if(stringlizedChat === ''){
|
if(stringlizedChat === ''){
|
||||||
stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
|
stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
|
||||||
@@ -201,7 +200,7 @@ export async function supaMemory(
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
const tokenz = await tokenize(result + '\n\n') + chatAdditonalTokens
|
const tokenz = await tokenize(result + '\n\n')
|
||||||
currentTokens += tokenz
|
currentTokens += tokenz
|
||||||
supaMemory += result.replace(/\n+/g,'\n') + '\n\n'
|
supaMemory += result.replace(/\n+/g,'\n') + '\n\n'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken";
|
|||||||
import { DataBase, type character } from "./storage/database";
|
import { DataBase, type character } from "./storage/database";
|
||||||
import { get } from "svelte/store";
|
import { get } from "svelte/store";
|
||||||
import { tokenizeTransformers } from "./transformers/transformer";
|
import { tokenizeTransformers } from "./transformers/transformer";
|
||||||
|
import type { OpenAIChat } from "./process";
|
||||||
|
|
||||||
async function encode(data:string):Promise<(number[]|Uint32Array)>{
|
async function encode(data:string):Promise<(number[]|Uint32Array)>{
|
||||||
let db = get(DataBase)
|
let db = get(DataBase)
|
||||||
@@ -37,6 +38,25 @@ export async function tokenize(data:string) {
|
|||||||
return encoded.length
|
return encoded.length
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export class ChatTokenizer {
|
||||||
|
|
||||||
|
private chatAdditonalTokens:number
|
||||||
|
private useName:'name'|'noName'
|
||||||
|
|
||||||
|
constructor(chatAdditonalTokens:number, useName:'name'|'noName'){
|
||||||
|
this.chatAdditonalTokens = chatAdditonalTokens
|
||||||
|
this.useName = useName
|
||||||
|
}
|
||||||
|
async tokenizeChat(data:OpenAIChat) {
|
||||||
|
const encoded = (await encode(data.content)).length
|
||||||
|
+ this.useName === 'name' ? (await encode(data.name)).length : 0 + this.chatAdditonalTokens
|
||||||
|
return encoded
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
export async function tokenizeNum(data:string) {
|
export async function tokenizeNum(data:string) {
|
||||||
const encoded = await encode(data)
|
const encoded = await encode(data)
|
||||||
return encoded
|
return encoded
|
||||||
|
|||||||
Reference in New Issue
Block a user