[feat] strongban

2023-12-06 18:21:06 +09:00
parent 1fac40df54
commit a3dba9f306
5 changed files with 64 additions and 10 deletions
--- a/src/lang/en.ts
+++ b/src/lang/en.ts
@@ -48,7 +48,7 @@ export const languageEnglish = {
        loreName: "name of the lore. it dosen't effects the Ai.",
        loreActivationKey: "If one of the activation key exists in context, the lore will be activated and prompt will go in. seperated by commas.",
        loreorder: "If insert Order is higher, it will effect the model more, and it will more lessly cuted when activated lore are many.",
-        bias:"bias is a key-value data which modifies the likelihood of string appearing.\nit can be -100 to 100, higher values will be more likely to appear, and lower values will be more unlikely to appear \nWarning: if the tokenizer is wrong, it not work properly.",
+        bias:"bias is a key-value data which modifies the likelihood of string appearing.\nit can be -100 to 100, higher values will be more likely to appear, and lower values will be more unlikely to appear. \nAdditionaly, if its set to -101, it would work as 'strong ban word' for some models. \nWarning: if the tokenizer is wrong, it not work properly.",
        emotion: "Emotion Images option shows image depending at character's emotion which is analized by character's response. you must input emotion name as words *(like joy, happy, fear and etc.)* .emotion named **neutral** will be default emotion if it exists. must be more then 3 images to work properly.",
        imggen: "After analyzing the chat, apply the prompt to {{slot}}.",
        regexScript: "Regex Script is a custom script that replaces string that matches IN to OUT.\n\nThere four type options."
--- a/src/lang/ko.ts
+++ b/src/lang/ko.ts
@@ -175,7 +175,7 @@ export const languageKorean = {
        loreName: "로어의 이름입니다. AI에 영향을 주지 않습니다.",
        loreActivationKey: "활성화 키 중 하나가 컨텍스트에 존재하면 해당 로어가 활성화됩니다. 쉼표로 구분된 활성화를 구분하세요.",
        loreorder: "순서가 높을수록 모델에 더 많은 영향을 미치며, 활성화된 로어가 많을 때 잘리지 않습니다.",
-        bias:"바이어스는 문자열이 나타날 가능성을 수정하는 키-값 데이터로, -100에서 100까지 가능하며 값이 클수록 나타날 가능성이 높고, 값이 작을수록 나타날 가능성이 낮습니다 \n경고: 토큰라이저가 잘못되면 제대로 작동하지 않습니다.",
+        bias:"바이어스는 문자열이 나타날 가능성을 수정하는 키-값 데이터로, -100에서 100까지 가능하며 값이 클수록 나타날 가능성이 높고, 값이 작을수록 나타날 가능성이 낮습니다 \n경고: 토크나이저가 잘못되면 제대로 작동하지 않습니다. 추가적으로, -101로 설정하면 일부 모델에서는 '강력한 단어 밴'으로 작동합니다",
        emotion: "감정 이미지 옵션은 캐릭터의 반응으로 분석된 캐릭터의 감정에 따라 이미지를 표시합니다. 감정 이름은 단어 *(예시: joy, happy, fear 등)* 로 입력해야 하며, **neutral** 이라는 이름의 감정이 존재하면 기본 감정이 됩니다. 제대로 작동하려면 이미지가 3개 이상이어야 합니다.",
        imggen: "채팅을 분석한 후 프롬프트를 {{slot}}에 적용합니다.",
        experimental: "실험적 기능입니다. 불안정할 수 있습니다.",
--- a/src/lib/Setting/Pages/BotSettings.svelte
+++ b/src/lib/Setting/Pages/BotSettings.svelte
@@ -484,11 +484,11 @@
        <span class="text-textcolor mb-2 mt-4">{language.formatingOrder} <Help key="formatOrder"/></span>
        <DropList bind:list={$DataBase.formatingOrder} />
    {/if}
-    <Arcodion styled name="Bias" help="bias">
+    <Arcodion styled name="Bias " help="bias">
        <table class="contain w-full max-w-full tabler">
            <tr>
-                <th class="font-medium w-1/2">Bias</th>
-                <th class="font-medium w-1/3">{language.value}</th>
+                <th class="font-medium">Bias</th>
+                <th class="font-medium">{language.value}</th>
                <th>
                    <button class="font-medium cursor-pointer hover:text-green-500 w-full flex justify-center items-center" on:click={() => {
                        let bia = $DataBase.bias
@@ -504,11 +504,11 @@
            {/if}
            {#each $DataBase.bias as bias, i}
                <tr>
-                    <td class="font-medium truncate w-1/2">
+                    <td class="font-medium truncate">
                        <TextInput bind:value={$DataBase.bias[i][0]} size="lg" fullwidth/>
                    </td>
-                    <td class="font-medium truncate w-1/3">
-                        <NumberInput bind:value={$DataBase.bias[i][1]} max={100} min={-100} size="lg" fullwidth/>
+                    <td class="font-medium truncate">
+                        <NumberInput bind:value={$DataBase.bias[i][1]} max={100} min={-101} size="lg" fullwidth/>
                    </td>
                    <td>
                        <button class="font-medium flex justify-center items-center h-full cursor-pointer hover:text-green-500 w-full" on:click={() => {
@@ -522,6 +522,7 @@
        </table>
    </Arcodion>

+
    {#if !$DataBase.promptTemplate}
        <div class="flex items-center mt-4">
            <Check bind:check={$DataBase.promptPreprocess} name={language.promptPreprocess}/>
--- a/src/ts/process/request.ts
+++ b/src/ts/process/request.ts
@@ -9,7 +9,7 @@ import { sleep } from "../util";
 import { createDeep } from "./deepai";
 import { hubURL } from "../characterCards";
 import { NovelAIBadWordIds, stringlizeNAIChat } from "./models/nai";
-import { tokenizeNum } from "../tokenizer";
+import { strongBan, tokenizeNum } from "../tokenizer";
 import { runLocalModel } from "./models/local";
 import { risuChatParser } from "../parser";
 import { SignatureV4 } from "@smithy/signature-v4";
@@ -230,10 +230,15 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'

            for(let i=0;i<biasString.length;i++){
                const bia = biasString[i]
+                if(bia[1] === -101){
+                    bias = await strongBan(bia[0], bias)
+                    continue
+                }
                const tokens = await tokenizeNum(bia[0])
        
                for(const token of tokens){
                    bias[token] = bia[1]
+
                }
            }

@@ -780,7 +785,7 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
                'prompt': proompt,
                presence_penalty: arg.PresensePenalty || (db.PresensePenalty / 100),
                frequency_penalty: arg.frequencyPenalty || (db.frequencyPenalty / 100),
-                logit_bias: bias,
+                logit_bias: {},
                max_tokens: maxTokens,
                stop: stopStrings,
                temperature: temperature,
--- a/src/ts/tokenizer.ts
+++ b/src/ts/tokenizer.ts
@@ -164,4 +164,52 @@ export class ChatTokenizer {
 export async function tokenizeNum(data:string) {
    const encoded = await encode(data)
    return encoded
+}
+
+export async function strongBan(data:string, bias:{[key:number]:number}) {
+
+    if(localStorage.getItem('strongBan_' + data)){
+        return JSON.parse(localStorage.getItem('strongBan_' + data))
+    }
+    const performace = performance.now()
+    const length = Object.keys(bias).length
+    let charAlt = [
+        data,
+        data.trim(),
+        data.toLocaleUpperCase(),
+        data.toLocaleLowerCase(),
+        data[0].toLocaleUpperCase() + data.slice(1),
+        data[0].toLocaleLowerCase() + data.slice(1),
+    ]
+
+    let banChars = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~“”‘’«»「」…–―※"
+    let unbanChars:number[] = []
+
+    for(const char of banChars){
+        unbanChars.push((await tokenizeNum(char))[0])
+    }
+
+
+    for(const char of banChars){
+        for(const alt of charAlt){
+            let fchar = char
+
+            const encoded = await tokenizeNum(alt + fchar)
+            if(encoded.length > 0){
+                if(!unbanChars.includes(encoded[0])){
+                    bias[encoded[0]] = -100
+                }
+            }
+            const encoded2 = await tokenizeNum(fchar + alt)
+            if(encoded2.length > 0){
+                if(!unbanChars.includes(encoded2[0])){
+                    bias[encoded2[0]] = -100
+                }
+            }
+        }
+    }
+    console.log('strongBan', performance.now() - performace)
+    console.log('added', Object.keys(bias).length - length)
+    localStorage.setItem('strongBan_' + data, JSON.stringify(bias))
+    return bias
 }