Add anti server overload

This commit is contained in:
Kwaroran
2025-02-26 09:14:32 +09:00
parent 8e00540d6e
commit 5a4b7bbef2
4 changed files with 227 additions and 124 deletions

View File

@@ -1058,4 +1058,5 @@ export const languageEnglish = {
depth: "Depth", depth: "Depth",
returnCSSError: "Return CSS Error", returnCSSError: "Return CSS Error",
thinkingTokens: "Thinking Tokens", thinkingTokens: "Thinking Tokens",
antiServerOverload: "Anti-Server Overload",
} }

View File

@@ -150,11 +150,6 @@
<Check bind:check={DBState.db.allowAllExtentionFiles} name="Allow all in file select"/> <Check bind:check={DBState.db.allowAllExtentionFiles} name="Allow all in file select"/>
</div> </div>
{#if DBState.db.useExperimental} {#if DBState.db.useExperimental}
<div class="flex items-center mt-4">
<Check bind:check={DBState.db.antiClaudeOverload} name={language.antiClaudeOverload}>
<Help key="experimental"/><Help key="antiClaudeOverload"/>
</Check>
</div>
<div class="flex items-center mt-4"> <div class="flex items-center mt-4">
<Check bind:check={DBState.db.claudeCachingExperimental} name={language.claudeCachingExperimental}> <Check bind:check={DBState.db.claudeCachingExperimental} name={language.claudeCachingExperimental}>
<Help key="experimental"/><Help key="claudeCachingExperimental"/> <Help key="experimental"/><Help key="claudeCachingExperimental"/>
@@ -200,6 +195,10 @@
<Check bind:check={DBState.db.returnCSSError} name={language.returnCSSError}> <Check bind:check={DBState.db.returnCSSError} name={language.returnCSSError}>
</Check> </Check>
</div> </div>
<div class="flex items-center mt-4">
<Check bind:check={DBState.db.antiServerOverloads} name={language.antiServerOverload}>
</Check>
</div>
{#if DBState.db.useExperimental} {#if DBState.db.useExperimental}
<div class="flex items-center mt-4"> <div class="flex items-center mt-4">
<Check bind:check={DBState.db.useExperimentalGoogleTranslator} name={"New Google Translate Experimental"}> <Check bind:check={DBState.db.useExperimentalGoogleTranslator} name={"New Google Translate Experimental"}>

View File

@@ -15,7 +15,6 @@ import { supportsInlayImage } from "./files/inlays";
import { Capacitor } from "@capacitor/core"; import { Capacitor } from "@capacitor/core";
import { getFreeOpenRouterModel } from "../model/openrouter"; import { getFreeOpenRouterModel } from "../model/openrouter";
import { runTransformers } from "./transformers"; import { runTransformers } from "./transformers";
import {createParser} from 'eventsource-parser'
import {Ollama} from 'ollama/dist/browser.mjs' import {Ollama} from 'ollama/dist/browser.mjs'
import { applyChatTemplate } from "./templates/chatTemplate"; import { applyChatTemplate } from "./templates/chatTemplate";
import { OobaParams } from "./prompt"; import { OobaParams } from "./prompt";
@@ -59,7 +58,8 @@ type requestDataResponse = {
noRetry?: boolean, noRetry?: boolean,
special?: { special?: {
emotion?: string emotion?: string
} },
failByServerError?: boolean
}|{ }|{
type: "streaming", type: "streaming",
result: ReadableStream<StreamResponseChunk>, result: ReadableStream<StreamResponseChunk>,
@@ -329,6 +329,13 @@ export async function requestChatData(arg:requestDataArgument, model:ModelModeEx
if(da.type !== 'fail' || da.noRetry){ if(da.type !== 'fail' || da.noRetry){
return da return da
} }
if(da.failByServerError){
await sleep(1000)
if(db.antiServerOverloads){
trys -= 0.5 // reduce trys by 0.5, so that it will retry twice as much
}
}
trys += 1 trys += 1
if(trys > db.requestRetrys){ if(trys > db.requestRetrys){
@@ -1909,6 +1916,101 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:generateContent?key=${db.google.accessToken}` url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:generateContent?key=${db.google.accessToken}`
} }
const fallBackGemini = async (originalError:string):Promise<requestDataResponse> => {
if(!db.antiServerOverloads){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
if(arg?.abortSignal?.aborted){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
if(arg.modelInfo.format === LLMFormat.VertexAIGemini){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
try {
const OAIMessages:OpenAIChat[] = body.contents.map((v) => {
return {
role: v.role === 'USER' ? 'user' : 'assistant',
content: v.parts.map((v) => {
return v.text ?? ''
}).join('\n')
}
})
if(body?.systemInstruction?.parts?.[0]?.text){
OAIMessages.unshift({
role: 'system',
content: body.systemInstruction.parts[0].text
})
}
await sleep(2000)
const res = await fetch('https://generativelanguage.googleapis.com/v1beta/openai/chat/completions', {
body: JSON.stringify({
model: arg.modelInfo.internalID,
messages: OAIMessages,
max_tokens: maxTokens,
temperature: body.generation_config?.temperature,
top_p: body.generation_config?.topP,
presence_penalty: body.generation_config?.presencePenalty,
frequency_penalty: body.generation_config?.frequencyPenalty,
}),
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${db.google.accessToken}`
},
signal: arg.abortSignal
})
if(!res.ok){
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
if(arg?.abortSignal?.aborted){
return {
type: 'fail',
result: originalError
}
}
const d = await res.json()
if(d?.choices?.[0]?.message?.content){
return {
type: 'success',
result: d.choices[0].message.content
}
}
else{
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
} catch (error) {
return {
type: 'fail',
result: originalError,
failByServerError: true
}
}
}
if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){ if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){
headers['Content-Type'] = 'application/json' headers['Content-Type'] = 'application/json'
@@ -1920,9 +2022,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
}) })
if(f.status !== 200){ if(f.status !== 200){
const text = await textifyReadableStream(f.body)
if(text.includes('RESOURCE_EXHAUSTED')){
return fallBackGemini(text)
}
return { return {
type: 'fail', type: 'fail',
result: await textifyReadableStream(f.body) result: text
} }
} }
@@ -1987,8 +2093,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
chatId: arg.chatId, chatId: arg.chatId,
abortSignal: arg.abortSignal, abortSignal: arg.abortSignal,
}) })
if(!res.ok){ if(!res.ok){
const text = JSON.stringify(res.data)
if(text.includes('RESOURCE_EXHAUSTED')){
return fallBackGemini(text)
}
return { return {
type: 'fail', type: 'fail',
result: `${JSON.stringify(res.data)}` result: `${JSON.stringify(res.data)}`
@@ -2700,7 +2811,6 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
result: await textifyReadableStream(res.body) result: await textifyReadableStream(res.body)
} }
} }
let rerequesting = false
let breakError = '' let breakError = ''
let thinking = false let thinking = false
@@ -2708,134 +2818,117 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
async start(controller){ async start(controller){
let text = '' let text = ''
let reader = res.body.getReader() let reader = res.body.getReader()
let parserData = ''
const decoder = new TextDecoder() const decoder = new TextDecoder()
const parser = createParser(async (e) => { const parseEvent = (async (e:string) => {
try { try {
if(e.type === 'event'){ const parsedData = JSON.parse(e)
switch(e.event){
case 'content_block_delta': {
if(e.data){
const parsedData = JSON.parse(e.data)
if(parsedData.delta?.type === 'text' || parsedData.delta?.type === 'text_delta'){
if(thinking){
text += "</Thoughts>\n\n"
thinking = false
}
text += parsedData.delta?.text ?? ''
controller.enqueue({
"0": text
})
}
if(parsedData.delta?.type === 'thinking' || parsedData.delta?.type === 'thinking_delta'){ if(parsedData?.type === 'content_block_delta'){
if(!thinking){ if(parsedData?.delta?.type === 'text' || parsedData.delta?.type === 'text_delta'){
text += "<Thoughts>\n" if(thinking){
thinking = true text += "</Thoughts>\n\n"
} thinking = false
text += parsedData.delta?.thinking ?? ''
controller.enqueue({
"0": text
})
}
if(parsedData?.delta?.type === 'redacted_thinking'){
if(!thinking){
text += "<Thoughts>\n"
thinking = true
}
text += '\n{{redacted_thinking}}\n'
controller.enqueue({
"0": text
})
}
}
break
} }
case 'error': { text += parsedData.delta?.text ?? ''
if(e.data){ controller.enqueue({
const errormsg:string = JSON.parse(e.data).error?.message "0": text
if(errormsg && errormsg.toLocaleLowerCase().includes('overload') && db.antiClaudeOverload){ })
console.log('Overload detected, retrying...') }
reader.cancel()
rerequesting = true if(parsedData?.delta?.type === 'thinking' || parsedData.delta?.type === 'thinking_delta'){
await sleep(2000) if(!thinking){
body.max_tokens -= await tokenize(text) text += "<Thoughts>\n"
if(body.max_tokens < 0){ thinking = true
body.max_tokens = 0
}
if(body.messages.at(-1)?.role !== 'assistant'){
body.messages.push({
role: 'assistant',
content: [{
type: 'text',
text: ''
}]
})
}
let block = body.messages[body.messages.length-1].content
if(typeof block === 'string'){
body.messages[body.messages.length-1].content += text
}
else if(block[0].type === 'text'){
block[0].text += text
}
const res = await fetchNative(replacerURL, {
body: JSON.stringify(body),
headers: {
"Content-Type": "application/json",
"x-api-key": apiKey,
"anthropic-version": "2023-06-01",
"accept": "application/json",
},
method: "POST",
chatId: arg.chatId
})
if(res.status !== 200){
breakError = 'Error: ' + await textifyReadableStream(res.body)
break
}
reader = res.body.getReader()
rerequesting = false
break
}
text += "Error:" + JSON.parse(e.data).error?.message
if(arg.extractJson && (db.jsonSchemaEnabled || arg.schema)){
controller.enqueue({
"0": extractJSON(text, db.jsonSchema)
})
}
else{
controller.enqueue({
"0": text
})
}
}
break
} }
text += parsedData.delta?.thinking ?? ''
controller.enqueue({
"0": text
})
}
if(parsedData?.delta?.type === 'redacted_thinking'){
if(!thinking){
text += "<Thoughts>\n"
thinking = true
}
text += '\n{{redacted_thinking}}\n'
controller.enqueue({
"0": text
})
} }
} }
} catch (error) {}
if(parsedData?.type === 'error'){
const errormsg:string = parsedData?.error?.message
if(errormsg && errormsg.toLocaleLowerCase().includes('overload') && db.antiServerOverloads){
// console.log('Overload detected, retrying...')
controller.enqueue({
"0": "Overload detected, retrying..."
})
return 'overload'
}
text += "Error:" + parsedData?.error?.message
if(arg.extractJson && (db.jsonSchemaEnabled || arg.schema)){
controller.enqueue({
"0": extractJSON(text, db.jsonSchema)
})
}
else{
controller.enqueue({
"0": text
})
}
}
}
catch (error) {
}
}) })
let breakWhile = false
while(true){ while(true){
if(rerequesting){ try {
if(breakError){ if(arg?.abortSignal?.aborted || breakWhile){
controller.enqueue({
"0": breakError
})
break break
} }
await sleep(1000)
continue
}
try {
const {done, value} = await reader.read() const {done, value} = await reader.read()
if(done){ if(done){
if(rerequesting){
continue
}
break break
} }
parser.feed(decoder.decode(value)) parserData += (decoder.decode(value))
let parts = parserData.split('\n')
for(let i=0;i<parts.length-1;i++){
if(parts[i].startsWith('data: ')){
const d = await parseEvent(parts[i].slice(6))
if(d === 'overload'){
parserData = ''
reader.cancel()
const res = await fetchNative(replacerURL, {
body: JSON.stringify(body),
headers: headers,
method: "POST",
chatId: arg.chatId
})
if(res.status !== 200){
controller.enqueue({
"0": await textifyReadableStream(res.body)
})
breakWhile = true
break
}
reader = res.body.getReader()
break
}
}
}
} catch (error) { } catch (error) {
await sleep(1) await sleep(1)
} }
@@ -2860,15 +2953,19 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
}) })
if(!res.ok){ if(!res.ok){
const stringlified = JSON.stringify(res.data)
return { return {
type: 'fail', type: 'fail',
result: JSON.stringify(res.data) result: stringlified,
failByServerError: stringlified?.toLocaleLowerCase()?.includes('overload')
} }
} }
if(res.data.error){ if(res.data.error){
const stringlified = JSON.stringify(res.data.error)
return { return {
type: 'fail', type: 'fail',
result: JSON.stringify(res.data.error) result: stringlified,
failByServerError: stringlified?.toLocaleLowerCase()?.includes('overload')
} }
} }
const contents = res?.data?.content const contents = res?.data?.content
@@ -2917,6 +3014,7 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise<requestDat
} }
} }
async function requestHorde(arg:RequestDataArgumentExtended):Promise<requestDataResponse> { async function requestHorde(arg:RequestDataArgumentExtended):Promise<requestDataResponse> {
const formated = arg.formated const formated = arg.formated
const db = getDatabase() const db = getDatabase()

View File

@@ -485,6 +485,10 @@ export function setDatabase(data:Database){
} }
data.returnCSSError ??= true data.returnCSSError ??= true
data.useExperimentalGoogleTranslator ??= false data.useExperimentalGoogleTranslator ??= false
if(data.antiClaudeOverload){ //migration
data.antiClaudeOverload = false
data.antiServerOverloads = true
}
changeLanguage(data.language) changeLanguage(data.language)
setDatabaseLite(data) setDatabaseLite(data)
} }
@@ -908,6 +912,7 @@ export interface Database{
returnCSSError:boolean returnCSSError:boolean
useExperimentalGoogleTranslator:boolean useExperimentalGoogleTranslator:boolean
thinkingTokens: number thinkingTokens: number
antiServerOverloads: boolean
} }
interface SeparateParameters{ interface SeparateParameters{