Add: New Option Combine Translation (#405)

# PR Checklist
- [x] Did you check if it works normally in all models? *ignore this
when it dosen't uses models*
- [x] Did you check if it works normally in all of web, local and node
hosted versions? if it dosen't, did you blocked it in those versions?
- [x] Did you added a type def?

# Description
Basically, this feature was created to combine a sentence together and
translate it when a display edit script splits one sentence into
multiple HTML tags.
I hope you confirm as this option will significantly improve translation
performance without modifying existing scripts! `(e.g. Automark)`
I also made `translateHTML` accept a `chatID` and optimized the existing
code to get the script from `charArg`.
This commit is contained in:
kwaroran
2024-05-06 06:33:23 +09:00
committed by GitHub
7 changed files with 122 additions and 16 deletions

View File

@@ -121,6 +121,7 @@ export const languageEnglish = {
antiClaudeOverload: "If Claude overload happens, RisuAI would try to prevent it by continuing with same prompt, making it less likely to happen. works only for streamed responses. this could not work for non-official api endpoints.",
triggerScript: "Trigger Script is a custom script that runs when a condition is met. it can be used to modify the chat data, run a command, change variable, and etc. the type depends when it is triggered. it can also be run by buttons, which can be used with {{button::Display::TriggerName}}, or HTML buttons with `risu-trigger=\"<TriggerName>\"` attribute.",
autoContinueChat: "If enabled, it will try to continue the chat if it doesn't ends with a punctuation. DONT USE THIS WITH LANGUAGES THAT DOESN'T USE PUNCTUATION.",
combineTranslation: "If enabled, text that is one sentence but separated by HTML tags will be combined together and translated, then Modify Display script will be reapplied to the translated output.\nThis helps the translator to make the correct translation.\nIf the UI becomes weird when you enable this option, please turn off the option and report it.",
},
setup: {
chooseProvider: "Choose AI Provider",
@@ -588,4 +589,5 @@ export const languageEnglish = {
syntax: "Syntax",
run: "Run",
noMessage: "Type something to start the chat.",
combineTranslation : "Combine Translation",
}

View File

@@ -192,6 +192,7 @@ export const languageKorean = {
+ "\n\n- $(name)\n\n - inserts the named group"
+ "\n\nOUT이 **@@**로 시작 시, 문자열을 교체하지 않고, 특수한 효과를 냅니다. 효과는 다음과 같습니다:"
+ "\n\n- @@emo (emotion name)\n\n - 캐릭터가 감정 이미지 모드일시, 감정을 (emotion name)으로 정하고 감정을 더이상 교체하지 않습니다.",
combineTranslation: "활성화된 경우, 한 문장이지만 HTML 태그로 분리된 텍스트를 모두 합쳐서 번역한 후, 번역된 결과에 다시 디스플레이 수정 스크립트를 적용합니다.\n이를 통해 번역기가 올바른 번역을 하도록 도와줍니다.\n이 옵션을 활성화하고 UI가 이상해지면 옵션을 끄고 제보해 주세요.",
},
setup: {
chooseProvider: "AI 제공자를 선택해 주세요",

View File

@@ -103,7 +103,7 @@
if(translateText){
const marked = await ParseMarkdown(data, charArg, mode, chatID)
translating = true
const translated = await translateHTML(marked, false, charArg)
const translated = await translateHTML(marked, false, charArg, chatID)
translating = false
lastParsed = translated
lastCharArg = charArg

View File

@@ -67,6 +67,11 @@
<div class="flex items-center mt-4">
<Check bind:check={$DataBase.useExperimental} name={language.useExperimental}> <Help key="useExperimental"/></Check>
</div>
<div class="flex items-center mt-4">
<Check bind:check={$DataBase.combineTranslation} name={language.combineTranslation}>
<Help key="combineTranslation"/>
</Check>
</div>
<div class="flex items-center mt-4">
<Check bind:check={$DataBase.forceProxyAsOpenAI} name={language.forceProxyAsOpenAI}> <Help key="forceProxyAsOpenAI"/></Check>
</div>

View File

@@ -399,6 +399,7 @@ export function setDatabase(data:Database){
data.textAreaSize ??= 0
data.sideBarSize ??= 0
data.textAreaTextSize ??= 0
data.combineTranslation ??= false
changeLanguage(data.language)
DataBase.set(data)
@@ -653,6 +654,7 @@ export interface Database{
textAreaSize:number
sideBarSize:number
textAreaTextSize:number
combineTranslation:boolean
}
export interface customscript{

View File

@@ -1,6 +1,6 @@
import { get } from "svelte/store"
import { translatorPlugin } from "../plugins/plugins"
import { DataBase, type customscript } from "../storage/database"
import { DataBase, type character, type customscript, type groupChat } from "../storage/database"
import { globalFetch } from "../storage/globalApi"
import { alertError } from "../alert"
import { requestChatData } from "../process/request"
@@ -8,7 +8,8 @@ import { doingChat } from "../process"
import type { simpleCharacterArgument } from "../parser"
import { selectedCharID } from "../stores"
import { getModuleRegexScripts } from "../process/modules"
import { sleep } from "../util"
import { getNodetextWithNewline, sleep } from "../util"
import { processScriptFull } from "../process/scripts"
let cache={
origin: [''],
@@ -210,7 +211,26 @@ export function isExpTranslator(){
return db.translatorType === 'llm' || db.translatorType === 'deepl' || db.translatorType === 'deeplX'
}
export async function translateHTML(html: string, reverse:boolean, charArg:simpleCharacterArgument|string = ''): Promise<string> {
export async function translateHTML(html: string, reverse:boolean, charArg:simpleCharacterArgument|string = '', chatID:number): Promise<string> {
let alwaysExistChar: character | groupChat | simpleCharacterArgument;
if(charArg !== ''){
if(typeof(charArg) === 'string'){
const db = get(DataBase)
const charId = get(selectedCharID)
alwaysExistChar = db.characters[charId]
}
else{
alwaysExistChar=charArg
}
} else {
alwaysExistChar = {
type: 'simple',
customscript: [],
virtualscript: null,
emotionImages: [],
chaId: 'simple'
}
}
let db = get(DataBase)
let DoingChat = get(doingChat)
if(DoingChat){
@@ -279,7 +299,7 @@ export async function translateHTML(html: string, reverse:boolean, charArg:simpl
}
async function translateNodeText(node:Node) {
async function translateNodeText(node:Node, reprocessDisplayScript:boolean = false) {
if(node.textContent.trim().length !== 0){
if(needSuperChunkedTranslate()){
const prm = new Promise<string>((resolve) => {
@@ -292,7 +312,32 @@ export async function translateHTML(html: string, reverse:boolean, charArg:simpl
return
}
node.textContent = await translate(node.textContent || '', reverse);
// node.textContent = await translate(node.textContent || '', reverse);
let translated = await translate(node.textContent || "", reverse);
if (!reprocessDisplayScript) {
node.textContent = translated;
return;
}
const { data: processedTranslated } = await processScriptFull(
alwaysExistChar,
translated,
"editdisplay",
chatID
);
// If the translation is the same, don't replace the node
if (translated == processedTranslated) {
node.textContent = processedTranslated;
return;
}
// Replace the old node with the new one
const newNode = document.createElement(
node.nodeType === Node.TEXT_NODE ? "span" : node.nodeName
);
newNode.innerHTML = processedTranslated;
node.parentNode.replaceChild(newNode, node);
}
}
@@ -317,6 +362,38 @@ export async function translateHTML(html: string, reverse:boolean, charArg:simpl
if(node.nodeName.toLowerCase() === 'script' || node.nodeName.toLowerCase() === 'style'){
return
}
// combineTranslation feature
if (
db.combineTranslation &&
node.nodeName.toLowerCase() === "p" &&
node instanceof HTMLElement
) {
const children = Array.from(node.childNodes);
const blacklist = ["img", "iframe", "script", "style", "div"];
const hasBlacklistChild = children.some((child) =>
blacklist.includes(child.nodeName.toLowerCase())
);
if (!hasBlacklistChild && (node as Element)?.getAttribute('translate') !== 'no'){
const text = getNodetextWithNewline(node);
const sentences = text.split("\n");
if (sentences.length > 1) {
// Multiple sentences seperated by <br> tags
// reconstruct the p tag
node.innerHTML = "";
for (const sentence of sentences) {
const newNode = document.createElement("span");
newNode.textContent = sentence;
node.appendChild(newNode);
await translateNodeText(newNode, true);
node.appendChild(document.createElement("br"));
}
} else {
// Single sentence
await translateNodeText(node, true);
}
return;
}
}
for (const child of Array.from(node.childNodes)) {
if(node.nodeType === Node.ELEMENT_NODE && (node as Element)?.getAttribute('translate') === 'no'){
@@ -342,16 +419,7 @@ export async function translateHTML(html: string, reverse:boolean, charArg:simpl
if(charArg !== ''){
let scripts:customscript[] = []
if(typeof(charArg) === 'string'){
const db = get(DataBase)
const charId = get(selectedCharID)
const char = db.characters[charId]
scripts = (getModuleRegexScripts() ?? []).concat(char?.customscript ?? [])
}
else{
scripts = (getModuleRegexScripts() ?? []).concat(charArg?.customscript ?? [])
}
scripts = (getModuleRegexScripts() ?? []).concat(alwaysExistChar?.customscript ?? [])
for(const script of scripts){
if(script.type === 'edittrans'){
const reg = new RegExp(script.in, script.ableFlag ? script.flag : 'g')

View File

@@ -522,4 +522,32 @@ export function appendLastPath(url, lastPath) {
// Concat the url and lastPath
return url + '/' + lastPath;
}
/**
* Retrieves the text content of a given Node object, including line breaks represented by <br> elements.
*
* @param {Node} node - The Node object from which the text content will be extracted.
* @returns {string} The text content of the Node, with line breaks represented by newline characters ('\n').
*
* @example
* const div = document.createElement('div');
* div.innerHTML = 'Hello<br>World';
* const text = getNodetextWithNewline(div);
* console.log(text); // Output: "Hello\nWorld"
*/
export function getNodetextWithNewline(node: Node) {
let result = '';
for (const child of node.childNodes) {
if (child.nodeType === Node.TEXT_NODE) {
result += child.textContent;
} else if (child.nodeType === Node.ELEMENT_NODE) {
if (child.nodeName === 'BR') {
result += '\n';
} else {
result += getNodetextWithNewline(child);
}
}
}
return result;
}