Merge branch 'main' into local_lore2

This commit is contained in:
kwaroran
2025-03-05 05:38:11 +09:00
committed by GitHub
23 changed files with 1022 additions and 313 deletions

View File

@@ -37,14 +37,14 @@ export class HypaProcesser{
name: "hypaVector"
})
this.vectors = []
const db = getDatabase()
if(model === 'auto'){
const db = getDatabase()
this.model = db.hypaModel || 'MiniLM'
}
else{
this.model = model
}
this.customEmbeddingUrl = customEmbeddingUrl
this.customEmbeddingUrl = customEmbeddingUrl || db.hypaCustomSettings.url
}
async embedDocuments(texts: string[]): Promise<VectorArray[]> {
@@ -77,12 +77,17 @@ export class HypaProcesser{
}
const {customEmbeddingUrl} = this
const replaceUrl = customEmbeddingUrl.endsWith('/embeddings')?customEmbeddingUrl:appendLastPath(customEmbeddingUrl,'embeddings')
gf = await globalFetch(replaceUrl.toString(), {
body:{
"input": input
},
})
const db = getDatabase()
const fetchArgs = {
...(db.hypaCustomSettings.key ? {headers: {"Authorization": "Bearer " + db.hypaCustomSettings.key}} : {}),
body: {
"input": input,
...(db.hypaCustomSettings.model ? {"model": db.hypaCustomSettings.model} : {})
}
};
gf = await globalFetch(replaceUrl.toString(), fetchArgs)
}
if(this.model === 'ada' || this.model === 'openai3small' || this.model === 'openai3large'){
const db = getDatabase()

View File

@@ -268,9 +268,7 @@ function getModuleByIds(ids:string[]){
modules.push(module)
}
}
if(db.moduleIntergration){
modules = deduplicateModuleById(modules)
}
modules = deduplicateModuleById(modules)
return modules
}

View File

@@ -15,7 +15,6 @@ import { supportsInlayImage } from "./files/inlays";
import { Capacitor } from "@capacitor/core";
import { getFreeOpenRouterModel } from "../model/openrouter";
import { runTransformers } from "./transformers";
import {createParser} from 'eventsource-parser'
import {Ollama} from 'ollama/dist/browser.mjs'
import { applyChatTemplate } from "./templates/chatTemplate";
import { OobaParams } from "./prompt";
@@ -59,7 +58,8 @@ type requestDataResponse = {
noRetry?: boolean,
special?: {
emotion?: string
}
},
failByServerError?: boolean
}|{
type: "streaming",
result: ReadableStream<StreamResponseChunk>,
@@ -92,12 +92,28 @@ interface OaiFunctions {
}
export type Parameter = 'temperature'|'top_k'|'repetition_penalty'|'min_p'|'top_a'|'top_p'|'frequency_penalty'|'presence_penalty'|'reasoning_effort'
export type Parameter = 'temperature'|'top_k'|'repetition_penalty'|'min_p'|'top_a'|'top_p'|'frequency_penalty'|'presence_penalty'|'reasoning_effort'|'thinking_tokens'
export type ModelModeExtended = 'model'|'submodel'|'memory'|'emotion'|'otherAx'|'translate'
type ParameterMap = {
[key in Parameter]?: string;
};
function setObjectValue<T>(obj: T, key: string, value: any): T {
const splitKey = key.split('.');
if(splitKey.length > 1){
const firstKey = splitKey.shift()
if(!obj[firstKey]){
obj[firstKey] = {};
}
obj[firstKey] = setObjectValue(obj[firstKey], splitKey.join('.'), value);
return obj;
}
obj[key] = value;
return obj;
}
function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended, arg:{
ignoreTopKIfZero?:boolean
} = {}): { [key: string]: any } {
@@ -157,6 +173,10 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
value = db.seperateParameters[ModelMode].top_p
break
}
case 'thinking_tokens':{
value = db.seperateParameters[ModelMode].thinking_tokens
break
}
case 'frequency_penalty':{
value = db.seperateParameters[ModelMode].frequency_penalty === -1000 ? -1000 : (db.seperateParameters[ModelMode].frequency_penalty / 100)
break
@@ -175,7 +195,7 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
continue
}
data[rename[parameter] ?? parameter] = value
data = setObjectValue(data, rename[parameter] ?? parameter, value)
}
return data
}
@@ -223,13 +243,17 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
value = db.PresensePenalty === -1000 ? -1000 : (db.PresensePenalty / 100)
break
}
case 'thinking_tokens':{
value = db.thinkingTokens
break
}
}
if(value === -1000){
continue
}
data[rename[parameter] ?? parameter] = value
data = setObjectValue(data, rename[parameter] ?? parameter, value)
}
return data
}
@@ -247,7 +271,7 @@ export async function requestChatData(arg:requestDataArgument, model:ModelModeEx
try{
const currentChar = getCurrentCharacter()
if(currentChar.type !== 'group'){
if(currentChar?.type !== 'group'){
const perf = performance.now()
const d = await runTrigger(currentChar, 'request', {
chat: getCurrentChat(),
@@ -305,6 +329,13 @@ export async function requestChatData(arg:requestDataArgument, model:ModelModeEx
if(da.type !== 'fail' || da.noRetry){
return da
}
if(da.failByServerError){
await sleep(1000)
if(db.antiServerOverloads){
trys -= 0.5 // reduce trys by 0.5, so that it will retry twice as much
}
}
trys += 1
if(trys > db.requestRetrys){
@@ -1710,17 +1741,6 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
})
}
}
else if(chat.role === 'assistant' && arg.modelInfo.flags.includes(LLMFlags.geminiThinking)){
reformatedChat.push({
role: 'MODEL',
parts: [chat.thoughts?.length > 0 ? {
text: chat.thoughts.join('\n\n')
} : null, {
text: chat.content
}]
})
}
else if(chat.role === 'assistant' || chat.role === 'user'){
reformatedChat.push({
role: chat.role === 'user' ? 'USER' : 'MODEL',
@@ -1896,6 +1916,101 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:generateContent?key=${db.google.accessToken}`
}
const fallBackGemini = async (originalError:string):Promise<requestDataResponse> => {
if(!db.antiServerOverloads){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
if(arg?.abortSignal?.aborted){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
if(arg.modelInfo.format === LLMFormat.VertexAIGemini){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
try {
const OAIMessages:OpenAIChat[] = body.contents.map((v) => {
return {
role: v.role === 'USER' ? 'user' : 'assistant',
content: v.parts.map((v) => {
return v.text ?? ''
}).join('\n')
}
})
if(body?.systemInstruction?.parts?.[0]?.text){
OAIMessages.unshift({
role: 'system',
content: body.systemInstruction.parts[0].text
})
}
await sleep(2000)
const res = await fetch('https://generativelanguage.googleapis.com/v1beta/openai/chat/completions', {
body: JSON.stringify({
model: arg.modelInfo.internalID,
messages: OAIMessages,
max_tokens: maxTokens,
temperature: body.generation_config?.temperature,
top_p: body.generation_config?.topP,
presence_penalty: body.generation_config?.presencePenalty,
frequency_penalty: body.generation_config?.frequencyPenalty,
}),
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${db.google.accessToken}`
},
signal: arg.abortSignal
})
if(!res.ok){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
if(arg?.abortSignal?.aborted){
return {
type: 'fail',
result: originalError
}
}
const d = await res.json()
if(d?.choices?.[0]?.message?.content){
return {
type: 'success',
result: d.choices[0].message.content
}
}
else{
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
} catch (error) {
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
}
if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){
headers['Content-Type'] = 'application/json'
@@ -1907,9 +2022,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
})
if(f.status !== 200){
const text = await textifyReadableStream(f.body)
if(text.includes('RESOURCE_EXHAUSTED')){
return fallBackGemini(text)
}
return {
type: 'fail',
result: await textifyReadableStream(f.body)
result: text
}
}
@@ -1974,8 +2093,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
chatId: arg.chatId,
abortSignal: arg.abortSignal,
})
if(!res.ok){
const text = JSON.stringify(res.data)
if(text.includes('RESOURCE_EXHAUSTED')){
return fallBackGemini(text)
}
return {
type: 'fail',
result: `${JSON.stringify(res.data)}`
@@ -2541,14 +2665,23 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
})
}
console.log(arg.modelInfo.parameters)
let body = applyParameters({
model: arg.modelInfo.internalID,
messages: finalChat,
system: systemPrompt.trim(),
max_tokens: maxTokens,
stream: useStreaming ?? false
}, ['temperature', 'top_k', 'top_p'], {}, arg.mode)
}, arg.modelInfo.parameters, {
'thinking_tokens': 'thinking.budget_tokens'
}, arg.mode)
if(body?.thinking?.budget_tokens === 0){
delete body.thinking
}
else if(body?.thinking?.budget_tokens && body?.thinking?.budget_tokens > 0){
body.thinking.type = 'enabled'
}
if(systemPrompt === ''){
delete body.system
@@ -2645,8 +2778,18 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
"accept": "application/json",
}
let betas:string[] = []
if(db.claudeCachingExperimental){
headers['anthropic-beta'] = 'prompt-caching-2024-07-31'
betas.push('prompt-caching-2024-07-31')
}
if(body.max_tokens > 8192){
betas.push('output-128k-2025-02-19')
}
if(betas.length > 0){
headers['anthropic-beta'] = betas.join(',')
}
if(db.usePlainFetch){
@@ -2668,113 +2811,116 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
result: await textifyReadableStream(res.body)
}
}
let rerequesting = false
let breakError = ''
let thinking = false
const stream = new ReadableStream<StreamResponseChunk>({
async start(controller){
let text = ''
let reader = res.body.getReader()
let parserData = ''
const decoder = new TextDecoder()
const parser = createParser(async (e) => {
const parseEvent = (async (e:string) => {
try {
if(e.type === 'event'){
switch(e.event){
case 'content_block_delta': {
if(e.data){
text += JSON.parse(e.data).delta?.text
controller.enqueue({
"0": text
})
}
break
const parsedData = JSON.parse(e)
if(parsedData?.type === 'content_block_delta'){
if(parsedData?.delta?.type === 'text' || parsedData.delta?.type === 'text_delta'){
if(thinking){
text += "</Thoughts>\n\n"
thinking = false
}
case 'error': {
if(e.data){
const errormsg:string = JSON.parse(e.data).error?.message
if(errormsg && errormsg.toLocaleLowerCase().includes('overload') && db.antiClaudeOverload){
console.log('Overload detected, retrying...')
reader.cancel()
rerequesting = true
await sleep(2000)
body.max_tokens -= await tokenize(text)
if(body.max_tokens < 0){
body.max_tokens = 0
}
if(body.messages.at(-1)?.role !== 'assistant'){
body.messages.push({
role: 'assistant',
content: [{
type: 'text',
text: ''
}]
})
}
let block = body.messages[body.messages.length-1].content
if(typeof block === 'string'){
body.messages[body.messages.length-1].content += text
}
else if(block[0].type === 'text'){
block[0].text += text
}
const res = await fetchNative(replacerURL, {
body: JSON.stringify(body),
headers: {
"Content-Type": "application/json",
"x-api-key": apiKey,
"anthropic-version": "2023-06-01",
"accept": "application/json",
},
method: "POST",
chatId: arg.chatId
})
if(res.status !== 200){
breakError = 'Error: ' + await textifyReadableStream(res.body)
break
}
reader = res.body.getReader()
rerequesting = false
break
}
text += "Error:" + JSON.parse(e.data).error?.message
if(arg.extractJson && (db.jsonSchemaEnabled || arg.schema)){
controller.enqueue({
"0": extractJSON(text, db.jsonSchema)
})
}
else{
controller.enqueue({
"0": text
})
}
}
break
text += parsedData.delta?.text ?? ''
}
if(parsedData?.delta?.type === 'thinking' || parsedData.delta?.type === 'thinking_delta'){
if(!thinking){
text += "<Thoughts>\n"
thinking = true
}
text += parsedData.delta?.thinking ?? ''
}
if(parsedData?.delta?.type === 'redacted_thinking'){
if(!thinking){
text += "<Thoughts>\n"
thinking = true
}
text += '\n{{redacted_thinking}}\n'
}
}
} catch (error) {}
if(parsedData?.type === 'error'){
const errormsg:string = parsedData?.error?.message
if(errormsg && errormsg.toLocaleLowerCase().includes('overload') && db.antiServerOverloads){
// console.log('Overload detected, retrying...')
controller.enqueue({
"0": "Overload detected, retrying..."
})
return 'overload'
}
text += "Error:" + parsedData?.error?.message
}
}
catch (error) {
}
})
let breakWhile = false
let i = 0;
let prevText = ''
while(true){
if(rerequesting){
if(breakError){
controller.enqueue({
"0": breakError
})
try {
if(arg?.abortSignal?.aborted || breakWhile){
break
}
await sleep(1000)
continue
}
try {
const {done, value} = await reader.read()
if(done){
if(rerequesting){
continue
}
break
}
parser.feed(decoder.decode(value))
parserData += (decoder.decode(value))
let parts = parserData.split('\n')
for(;i<parts.length-1;i++){
prevText = text
if(parts?.[i]?.startsWith('data: ')){
const d = await parseEvent(parts[i].slice(6))
if(d === 'overload'){
parserData = ''
prevText = ''
text = ''
reader.cancel()
const res = await fetchNative(replacerURL, {
body: JSON.stringify(body),
headers: headers,
method: "POST",
chatId: arg.chatId
})
if(res.status !== 200){
controller.enqueue({
"0": await textifyReadableStream(res.body)
})
breakWhile = true
break
}
reader = res.body.getReader()
break
}
}
}
i--;
text = prevText
controller.enqueue({
"0": text
})
} catch (error) {
await sleep(1)
}
@@ -2799,24 +2945,55 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
})
if(!res.ok){
const stringlified = JSON.stringify(res.data)
return {
type: 'fail',
result: JSON.stringify(res.data)
result: stringlified,
failByServerError: stringlified?.toLocaleLowerCase()?.includes('overload')
}
}
if(res.data.error){
const stringlified = JSON.stringify(res.data.error)
return {
type: 'fail',
result: JSON.stringify(res.data.error)
result: stringlified,
failByServerError: stringlified?.toLocaleLowerCase()?.includes('overload')
}
}
const resText = res?.data?.content?.[0]?.text
if(!resText){
const contents = res?.data?.content
if(!contents || contents.length === 0){
return {
type: 'fail',
result: JSON.stringify(res.data)
}
}
let resText = ''
let thinking = false
for(const content of contents){
if(content.type === 'text'){
if(thinking){
resText += "</Thoughts>\n\n"
thinking = false
}
resText += content.text
}
if(content.type === 'thinking'){
if(!thinking){
resText += "<Thoughts>\n"
thinking = true
}
resText += content.thinking ?? ''
}
if(content.type === 'redacted_thinking'){
if(!thinking){
resText += "<Thoughts>\n"
thinking = true
}
resText += '\n{{redacted_thinking}}\n'
}
}
if(arg.extractJson && db.jsonSchemaEnabled){
return {
type: 'success',
@@ -2829,6 +3006,7 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
}
}
async function requestHorde(arg:RequestDataArgumentExtended):Promise<requestDataResponse> {
const formated = arg.formated
const db = getDatabase()