[feat] strongban

This commit is contained in:
kwaroran
2023-12-06 18:21:06 +09:00
parent 1fac40df54
commit a3dba9f306
5 changed files with 64 additions and 10 deletions

View File

@@ -9,7 +9,7 @@ import { sleep } from "../util";
import { createDeep } from "./deepai";
import { hubURL } from "../characterCards";
import { NovelAIBadWordIds, stringlizeNAIChat } from "./models/nai";
import { tokenizeNum } from "../tokenizer";
import { strongBan, tokenizeNum } from "../tokenizer";
import { runLocalModel } from "./models/local";
import { risuChatParser } from "../parser";
import { SignatureV4 } from "@smithy/signature-v4";
@@ -230,10 +230,15 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
for(let i=0;i<biasString.length;i++){
const bia = biasString[i]
if(bia[1] === -101){
bias = await strongBan(bia[0], bias)
continue
}
const tokens = await tokenizeNum(bia[0])
for(const token of tokens){
bias[token] = bia[1]
}
}
@@ -780,7 +785,7 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
'prompt': proompt,
presence_penalty: arg.PresensePenalty || (db.PresensePenalty / 100),
frequency_penalty: arg.frequencyPenalty || (db.frequencyPenalty / 100),
logit_bias: bias,
logit_bias: {},
max_tokens: maxTokens,
stop: stopStrings,
temperature: temperature,

View File

@@ -164,4 +164,52 @@ export class ChatTokenizer {
export async function tokenizeNum(data:string) {
const encoded = await encode(data)
return encoded
}
export async function strongBan(data:string, bias:{[key:number]:number}) {
if(localStorage.getItem('strongBan_' + data)){
return JSON.parse(localStorage.getItem('strongBan_' + data))
}
const performace = performance.now()
const length = Object.keys(bias).length
let charAlt = [
data,
data.trim(),
data.toLocaleUpperCase(),
data.toLocaleLowerCase(),
data[0].toLocaleUpperCase() + data.slice(1),
data[0].toLocaleLowerCase() + data.slice(1),
]
let banChars = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~“”‘’«»「」…–―※"
let unbanChars:number[] = []
for(const char of banChars){
unbanChars.push((await tokenizeNum(char))[0])
}
for(const char of banChars){
for(const alt of charAlt){
let fchar = char
const encoded = await tokenizeNum(alt + fchar)
if(encoded.length > 0){
if(!unbanChars.includes(encoded[0])){
bias[encoded[0]] = -100
}
}
const encoded2 = await tokenizeNum(fchar + alt)
if(encoded2.length > 0){
if(!unbanChars.includes(encoded2[0])){
bias[encoded2[0]] = -100
}
}
}
}
console.log('strongBan', performance.now() - performace)
console.log('added', Object.keys(bias).length - length)
localStorage.setItem('strongBan_' + data, JSON.stringify(bias))
return bias
}