[feat] romanizer
This commit is contained in:
@@ -96,6 +96,7 @@ export const languageEnglish = {
|
||||
+ "\n - `{{bg::<asset name>}}`: inject the background as asset",
|
||||
additionalText: "The text that would be added to Character Description only when ai thinks its needed, so you can put long texts here. seperate with double newlines.",
|
||||
charjs: "A javascript code that would run with character. for example, you can check `https://github.com/kwaroran/RisuAI/blob/main/src/etc/example-char.js`",
|
||||
romanizer: "Romanizer is a plugin that converts non-roman characters to roman characters to reduce tokens when using non-roman characters while requesting data.",
|
||||
},
|
||||
setup: {
|
||||
chooseProvider: "Choose AI Provider",
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
import NumberInput from "src/lib/UI/GUI/NumberInput.svelte";
|
||||
import SelectInput from "src/lib/UI/GUI/SelectInput.svelte";
|
||||
import OptionInput from "src/lib/UI/GUI/OptionInput.svelte";
|
||||
import Help from "src/lib/Others/Help.svelte";
|
||||
|
||||
</script>
|
||||
<h2 class="mb-2 text-2xl font-bold mt-2">{language.plugin}</h2>
|
||||
@@ -23,6 +24,12 @@
|
||||
<div class="flex items-center mt-2">
|
||||
<Check bind:check={$DataBase.officialplugins.automark} name={language.able}/>
|
||||
</div>
|
||||
<div class="flex">
|
||||
<span class="font-bold flex-grow">Romanizer <Help key="romanizer" /> <span class="text-green-500 italic">(Official Plugin)</span></span>
|
||||
</div>
|
||||
<div class="flex items-center mt-2">
|
||||
<Check bind:check={$DataBase.officialplugins.romanizer} name={language.able}/>
|
||||
</div>
|
||||
{#each $DataBase.plugins as plugin, i}
|
||||
<div class="border-borderc mt-2 mb-2 w-full border-solid border-b-1 seperator"></div>
|
||||
<div class="flex">
|
||||
|
||||
63
src/ts/plugins/romanizer.ts
Normal file
63
src/ts/plugins/romanizer.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
export function romanizer(texts:string[]){
|
||||
const koreanDict = {"cho":{"ᄀ":"g","ᄁ":"kk","ᄂ":"n","ᄃ":"d","ᄄ":"tt","ᄅ":"r","ᄆ":"m","ᄇ":"b","ᄈ":"pp","ᄉ":"s","ᄊ":"ss","ᄋ":"","ᄌ":"j","ᄍ":"jj","ᄎ":"ch","ᄏ":"k","ᄐ":"t","ᄑ":"p","ᄒ":"h"},"jung":{"ᅡ":"a","ᅢ":"ae","ᅣ":"ya","ᅤ":"yae","ᅥ":"eo","ᅦ":"e","ᅧ":"yeo","ᅨ":"ye","ᅩ":"o","ᅪ":"wa","ᅫ":"wae","ᅬ":"oe","ᅭ":"yo","ᅮ":"u","ᅯ":"wo","ᅰ":"we","ᅱ":"wi","ᅲ":"yu","ᅳ":"eu","ᅴ":"eui","ᅵ":"i"},"jong":{"ᆨ":"k","ᆨᄋ":"g","ᆨᄂ":"ngn","ᆨᄅ":"ngn","ᆨᄆ":"ngm","ᆨᄒ":"kh","ᆩ":"kk","ᆩᄋ":"kg","ᆩᄂ":"ngn","ᆩᄅ":"ngn","ᆩᄆ":"ngm","ᆩᄒ":"kh","ᆪ":"k","ᆪᄋ":"ks","ᆪᄂ":"ngn","ᆪᄅ":"ngn","ᆪᄆ":"ngm","ᆪᄒ":"kch","ᆫ":"n","ᆫᄅ":"ll","ᆬ":"n","ᆬᄋ":"nj","ᆬᄂ":"nn","ᆬᄅ":"nn","ᆬᄆ":"nm","ᆬㅎ":"nch","ᆭ":"n","ᆭᄋ":"nh","ᆭᄅ":"nn","ᆮ":"t","ᆮᄋ":"d","ᆮᄂ":"nn","ᆮᄅ":"nn","ᆮᄆ":"nm","ᆮᄒ":"th","ᆯ":"l","ᆯᄋ":"r","ᆯᄂ":"ll","ᆯᄅ":"ll","ᆰ":"k","ᆰᄋ":"lg","ᆰᄂ":"ngn","ᆰᄅ":"ngn","ᆰᄆ":"ngm","ᆰᄒ":"lkh","ᆱ":"m","ᆱᄋ":"lm","ᆱᄂ":"mn","ᆱᄅ":"mn","ᆱᄆ":"mm","ᆱᄒ":"lmh","ᆲ":"p","ᆲᄋ":"lb","ᆲᄂ":"mn","ᆲᄅ":"mn","ᆲᄆ":"mm","ᆲᄒ":"lph","ᆳ":"t","ᆳᄋ":"ls","ᆳᄂ":"nn","ᆳᄅ":"nn","ᆳᄆ":"nm","ᆳᄒ":"lsh","ᆴ":"t","ᆴᄋ":"lt","ᆴᄂ":"nn","ᆴᄅ":"nn","ᆴᄆ":"nm","ᆴᄒ":"lth","ᆵ":"p","ᆵᄋ":"lp","ᆵᄂ":"mn","ᆵᄅ":"mn","ᆵᄆ":"mm","ᆵᄒ":"lph","ᆶ":"l","ᆶᄋ":"lh","ᆶᄂ":"ll","ᆶᄅ":"ll","ᆶᄆ":"lm","ᆶᄒ":"lh","ᆷ":"m","ᆷᄅ":"mn","ᆸ":"p","ᆸᄋ":"b","ᆸᄂ":"mn","ᆸᄅ":"mn","ᆸᄆ":"mm","ᆸᄒ":"ph","ᆹ":"p","ᆹᄋ":"ps","ᆹᄂ":"mn","ᆹᄅ":"mn","ᆹᄆ":"mm","ᆹᄒ":"psh","ᆺ":"t","ᆺᄋ":"s","ᆺᄂ":"nn","ᆺᄅ":"nn","ᆺᄆ":"nm","ᆺᄒ":"sh","ᆻ":"t","ᆻᄋ":"ss","ᆻᄂ":"tn","ᆻᄅ":"tn","ᆻᄆ":"nm","ᆻᄒ":"th","ᆼ":"ng","ᆽ":"t","ᆽᄋ":"j","ᆽᄂ":"nn","ᆽᄅ":"nn","ᆽᄆ":"nm","ᆽᄒ":"ch","ᆾ":"t","ᆾᄋ":"ch","ᆾᄂ":"nn","ᆾᄅ":"nn","ᆾᄆ":"nm","ᆾᄒ":"ch","ᆿ":"k","ᆿᄋ":"k","ᆿᄂ":"ngn","ᆿᄅ":"ngn","ᆿᄆ":"ngm","ᆿᄒ":"kh","ᇀ":"t","ᇀᄋ":"t","ᇀᄂ":"nn","ᇀᄅ":"nn","ᇀᄆ":"nm","ᇀᄒ":"th","ᇁ":"p","ᇁᄋ":"p","ᇁᄂ":"mn","ᇁᄅ":"mn","ᇁᄆ":"mm","ᇁᄒ":"ph","ᇂ":"t","ᇂᄋ":"h","ᇂᄂ":"nn","ᇂᄅ":"nn","ᇂᄆ":"mm","ᇂᄒ":"t","ᇂᄀ":"k"}}
|
||||
const cyrillicDict = {"а":"a","б":"b","в":"v","г":"g","д":"d","е":"e","ё":"yo","ж":"zh","з":"z","и":"i","й":"j","к":"k","л":"l","м":"m","н":"n","о":"o","п":"p","р":"r","с":"s","т":"t","у":"u","ф":"f","х":"h","ц":"c","ч":"ch","ш":"sh","щ":"sch","ъ":"","ы":"y","ь":"j","э":"e","ю":"yu","я":"ya"}
|
||||
const hybrewDict = {"א":"a","ב":"b","ג":"g","ד":"d","ה":"h","ו":"v","ז":"z","ח":"ch","ט":"t","י":"y","כ":"k","ך":"k","ל":"l","מ":"m","ם":"m","נ":"n","ן":"n","ס":"s","ע":"a","פ":"p","ף":"p","צ":"ts","ץ":"ts","ק":"k","ר":"r","ש":"sh","ת":"t"}
|
||||
let language = {
|
||||
'korean': 0,
|
||||
'cyrillic': 0,
|
||||
'hybrew': 0,
|
||||
'roman': 0
|
||||
}
|
||||
let fullResult:string[] = []
|
||||
|
||||
for(const text of texts){
|
||||
let result = ''
|
||||
for(let i = 0; i < text.length; i++){
|
||||
const char = text[i]
|
||||
|
||||
//hangul
|
||||
if(/[\u3131-\u314e|\u314f-\u3163|\uac00-\ud7a3]/.test(char)){
|
||||
const code = char.normalize('NFD')
|
||||
let text = ''
|
||||
text += koreanDict.cho[code[0]]
|
||||
if(code.length >= 2){
|
||||
text += koreanDict.jung[code[1]]
|
||||
}
|
||||
if(code.length === 3){
|
||||
text += koreanDict.jong[code[2]]
|
||||
}
|
||||
language.korean++
|
||||
continue
|
||||
}
|
||||
|
||||
//cyrillic
|
||||
if(cyrillicDict[char]){
|
||||
result += cyrillicDict[char]
|
||||
language.cyrillic++
|
||||
continue
|
||||
}
|
||||
|
||||
//hybrew
|
||||
if(hybrewDict[char]){
|
||||
result += hybrewDict[char]
|
||||
language.hybrew++
|
||||
continue
|
||||
}
|
||||
|
||||
//roman
|
||||
if(/[a-zA-Z]/.test(char)){
|
||||
result += char
|
||||
language.roman++
|
||||
continue
|
||||
}
|
||||
}
|
||||
fullResult.push(result)
|
||||
}
|
||||
|
||||
const mostUsed = Object.keys(language).reduce((a, b) => language[a] > language[b] ? a : b)
|
||||
|
||||
return {
|
||||
'result': fullResult,
|
||||
'mostUsed': mostUsed
|
||||
}
|
||||
}
|
||||
@@ -474,6 +474,26 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
||||
currentTokens += await tokenizer.tokenizeChat(chat)
|
||||
}
|
||||
|
||||
if(db.officialplugins.romanizer){
|
||||
const romanizer = await import('../plugins/romanizer')
|
||||
const r = romanizer.romanizer(chats.map((v) => {
|
||||
return v.content
|
||||
}))
|
||||
|
||||
for(let i=0;i<chats.length;i++){
|
||||
const pchat = cloneDeep(chats[i])
|
||||
pchat.content = r.result[i]
|
||||
if(await tokenizer.tokenizeChat(chats[i]) > await tokenizer.tokenizeChat(pchat)){
|
||||
chats[i] = pchat
|
||||
}
|
||||
}
|
||||
|
||||
unformated.postEverything.push({
|
||||
role: 'system',
|
||||
content: `user and assistant are chatting with romanized ${r.mostUsed}, but always respond with ${r.mostUsed} with ${r.mostUsed} letters.`
|
||||
})
|
||||
}
|
||||
|
||||
if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){
|
||||
const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer, {
|
||||
asHyper: db.supaMemoryType !== 'subModel' && db.hypaMemory
|
||||
|
||||
@@ -360,6 +360,7 @@ export interface Database{
|
||||
plugins: RisuPlugin[]
|
||||
officialplugins: {
|
||||
automark?: boolean
|
||||
romanizer?: boolean
|
||||
}
|
||||
currentPluginProvider: string
|
||||
zoomsize:number
|
||||
|
||||
Reference in New Issue
Block a user