From 5a4b7bbef2cb2061616232800b2267374612dd9d Mon Sep 17 00:00:00 2001 From: Kwaroran Date: Wed, 26 Feb 2025 09:14:32 +0900 Subject: [PATCH] Add anti server overload --- src/lang/en.ts | 1 + src/lib/Setting/Pages/AdvancedSettings.svelte | 9 +- src/ts/process/request.ts | 336 +++++++++++------- src/ts/storage/database.svelte.ts | 5 + 4 files changed, 227 insertions(+), 124 deletions(-) diff --git a/src/lang/en.ts b/src/lang/en.ts index ae7c9ab6..e5a4c8d5 100644 --- a/src/lang/en.ts +++ b/src/lang/en.ts @@ -1058,4 +1058,5 @@ export const languageEnglish = { depth: "Depth", returnCSSError: "Return CSS Error", thinkingTokens: "Thinking Tokens", + antiServerOverload: "Anti-Server Overload", } diff --git a/src/lib/Setting/Pages/AdvancedSettings.svelte b/src/lib/Setting/Pages/AdvancedSettings.svelte index 77d6e87e..8f65b62a 100644 --- a/src/lib/Setting/Pages/AdvancedSettings.svelte +++ b/src/lib/Setting/Pages/AdvancedSettings.svelte @@ -150,11 +150,6 @@ {#if DBState.db.useExperimental} -
- - - -
@@ -200,6 +195,10 @@
+
+ + +
{#if DBState.db.useExperimental}
diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts index 10575bf2..768886c8 100644 --- a/src/ts/process/request.ts +++ b/src/ts/process/request.ts @@ -15,7 +15,6 @@ import { supportsInlayImage } from "./files/inlays"; import { Capacitor } from "@capacitor/core"; import { getFreeOpenRouterModel } from "../model/openrouter"; import { runTransformers } from "./transformers"; -import {createParser} from 'eventsource-parser' import {Ollama} from 'ollama/dist/browser.mjs' import { applyChatTemplate } from "./templates/chatTemplate"; import { OobaParams } from "./prompt"; @@ -59,7 +58,8 @@ type requestDataResponse = { noRetry?: boolean, special?: { emotion?: string - } + }, + failByServerError?: boolean }|{ type: "streaming", result: ReadableStream, @@ -329,6 +329,13 @@ export async function requestChatData(arg:requestDataArgument, model:ModelModeEx if(da.type !== 'fail' || da.noRetry){ return da } + + if(da.failByServerError){ + await sleep(1000) + if(db.antiServerOverloads){ + trys -= 0.5 // reduce trys by 0.5, so that it will retry twice as much + } + } trys += 1 if(trys > db.requestRetrys){ @@ -1909,6 +1916,101 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:generateContent?key=${db.google.accessToken}` } + const fallBackGemini = async (originalError:string):Promise => { + if(!db.antiServerOverloads){ + return { + type: 'fail', + result: originalError, + failByServerError: true + } + } + + if(arg?.abortSignal?.aborted){ + return { + type: 'fail', + result: originalError, + failByServerError: true + } + } + if(arg.modelInfo.format === LLMFormat.VertexAIGemini){ + return { + type: 'fail', + result: originalError, + failByServerError: true + } + } + try { + const OAIMessages:OpenAIChat[] = body.contents.map((v) => { + return { + role: v.role === 'USER' ? 'user' : 'assistant', + content: v.parts.map((v) => { + return v.text ?? '' + }).join('\n') + } + }) + if(body?.systemInstruction?.parts?.[0]?.text){ + OAIMessages.unshift({ + role: 'system', + content: body.systemInstruction.parts[0].text + }) + } + await sleep(2000) + const res = await fetch('https://generativelanguage.googleapis.com/v1beta/openai/chat/completions', { + body: JSON.stringify({ + model: arg.modelInfo.internalID, + messages: OAIMessages, + max_tokens: maxTokens, + temperature: body.generation_config?.temperature, + top_p: body.generation_config?.topP, + presence_penalty: body.generation_config?.presencePenalty, + frequency_penalty: body.generation_config?.frequencyPenalty, + }), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${db.google.accessToken}` + }, + signal: arg.abortSignal + }) + + if(!res.ok){ + return { + type: 'fail', + result: originalError, + failByServerError: true + } + } + + if(arg?.abortSignal?.aborted){ + return { + type: 'fail', + result: originalError + } + } + + const d = await res.json() + + if(d?.choices?.[0]?.message?.content){ + return { + type: 'success', + result: d.choices[0].message.content + } + } + else{ + return { + type: 'fail', + result: originalError, + failByServerError: true + } + } + } catch (error) { + return { + type: 'fail', + result: originalError, + failByServerError: true + } + } + } if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){ headers['Content-Type'] = 'application/json' @@ -1920,9 +2022,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise }) if(f.status !== 200){ + const text = await textifyReadableStream(f.body) + if(text.includes('RESOURCE_EXHAUSTED')){ + return fallBackGemini(text) + } return { type: 'fail', - result: await textifyReadableStream(f.body) + result: text } } @@ -1987,8 +2093,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise chatId: arg.chatId, abortSignal: arg.abortSignal, }) + if(!res.ok){ + const text = JSON.stringify(res.data) + if(text.includes('RESOURCE_EXHAUSTED')){ + return fallBackGemini(text) + } return { type: 'fail', result: `${JSON.stringify(res.data)}` @@ -2700,7 +2811,6 @@ async function requestClaude(arg:RequestDataArgumentExtended):Promise { + const parseEvent = (async (e:string) => { try { - if(e.type === 'event'){ - switch(e.event){ - case 'content_block_delta': { - if(e.data){ - const parsedData = JSON.parse(e.data) - if(parsedData.delta?.type === 'text' || parsedData.delta?.type === 'text_delta'){ - if(thinking){ - text += "\n\n" - thinking = false - } - text += parsedData.delta?.text ?? '' - controller.enqueue({ - "0": text - }) - } + const parsedData = JSON.parse(e) - if(parsedData.delta?.type === 'thinking' || parsedData.delta?.type === 'thinking_delta'){ - if(!thinking){ - text += "\n" - thinking = true - } - text += parsedData.delta?.thinking ?? '' - controller.enqueue({ - "0": text - }) - } - - if(parsedData?.delta?.type === 'redacted_thinking'){ - if(!thinking){ - text += "\n" - thinking = true - } - text += '\n{{redacted_thinking}}\n' - controller.enqueue({ - "0": text - }) - } - } - break + if(parsedData?.type === 'content_block_delta'){ + if(parsedData?.delta?.type === 'text' || parsedData.delta?.type === 'text_delta'){ + if(thinking){ + text += "\n\n" + thinking = false } - case 'error': { - if(e.data){ - const errormsg:string = JSON.parse(e.data).error?.message - if(errormsg && errormsg.toLocaleLowerCase().includes('overload') && db.antiClaudeOverload){ - console.log('Overload detected, retrying...') - reader.cancel() - rerequesting = true - await sleep(2000) - body.max_tokens -= await tokenize(text) - if(body.max_tokens < 0){ - body.max_tokens = 0 - } - if(body.messages.at(-1)?.role !== 'assistant'){ - body.messages.push({ - role: 'assistant', - content: [{ - type: 'text', - text: '' - }] - }) - } - let block = body.messages[body.messages.length-1].content - if(typeof block === 'string'){ - body.messages[body.messages.length-1].content += text - } - else if(block[0].type === 'text'){ - block[0].text += text - } - const res = await fetchNative(replacerURL, { - body: JSON.stringify(body), - headers: { - "Content-Type": "application/json", - "x-api-key": apiKey, - "anthropic-version": "2023-06-01", - "accept": "application/json", - }, - method: "POST", - chatId: arg.chatId - }) - if(res.status !== 200){ - breakError = 'Error: ' + await textifyReadableStream(res.body) - break - } - reader = res.body.getReader() - rerequesting = false - break - } - text += "Error:" + JSON.parse(e.data).error?.message - if(arg.extractJson && (db.jsonSchemaEnabled || arg.schema)){ - controller.enqueue({ - "0": extractJSON(text, db.jsonSchema) - }) - } - else{ - controller.enqueue({ - "0": text - }) - } - } - break + text += parsedData.delta?.text ?? '' + controller.enqueue({ + "0": text + }) + } + + if(parsedData?.delta?.type === 'thinking' || parsedData.delta?.type === 'thinking_delta'){ + if(!thinking){ + text += "\n" + thinking = true } + text += parsedData.delta?.thinking ?? '' + controller.enqueue({ + "0": text + }) + } + + if(parsedData?.delta?.type === 'redacted_thinking'){ + if(!thinking){ + text += "\n" + thinking = true + } + text += '\n{{redacted_thinking}}\n' + controller.enqueue({ + "0": text + }) } } - } catch (error) {} + + if(parsedData?.type === 'error'){ + const errormsg:string = parsedData?.error?.message + if(errormsg && errormsg.toLocaleLowerCase().includes('overload') && db.antiServerOverloads){ + // console.log('Overload detected, retrying...') + controller.enqueue({ + "0": "Overload detected, retrying..." + }) + + return 'overload' + } + text += "Error:" + parsedData?.error?.message + if(arg.extractJson && (db.jsonSchemaEnabled || arg.schema)){ + controller.enqueue({ + "0": extractJSON(text, db.jsonSchema) + }) + } + else{ + controller.enqueue({ + "0": text + }) + } + + } + + } + catch (error) { + } + + + }) + let breakWhile = false while(true){ - if(rerequesting){ - if(breakError){ - controller.enqueue({ - "0": breakError - }) + try { + if(arg?.abortSignal?.aborted || breakWhile){ break } - await sleep(1000) - continue - } - try { const {done, value} = await reader.read() if(done){ - if(rerequesting){ - continue - } break } - parser.feed(decoder.decode(value)) + parserData += (decoder.decode(value)) + let parts = parserData.split('\n') + for(let i=0;i { const formated = arg.formated const db = getDatabase() diff --git a/src/ts/storage/database.svelte.ts b/src/ts/storage/database.svelte.ts index 537dbe2b..17c0c174 100644 --- a/src/ts/storage/database.svelte.ts +++ b/src/ts/storage/database.svelte.ts @@ -485,6 +485,10 @@ export function setDatabase(data:Database){ } data.returnCSSError ??= true data.useExperimentalGoogleTranslator ??= false + if(data.antiClaudeOverload){ //migration + data.antiClaudeOverload = false + data.antiServerOverloads = true + } changeLanguage(data.language) setDatabaseLite(data) } @@ -908,6 +912,7 @@ export interface Database{ returnCSSError:boolean useExperimentalGoogleTranslator:boolean thinkingTokens: number + antiServerOverloads: boolean } interface SeparateParameters{