[feat] strongban

This commit is contained in:
kwaroran
2023-12-06 18:21:06 +09:00
parent 1fac40df54
commit a3dba9f306
5 changed files with 64 additions and 10 deletions

View File

@@ -164,4 +164,52 @@ export class ChatTokenizer {
export async function tokenizeNum(data:string) {
const encoded = await encode(data)
return encoded
}
export async function strongBan(data:string, bias:{[key:number]:number}) {
if(localStorage.getItem('strongBan_' + data)){
return JSON.parse(localStorage.getItem('strongBan_' + data))
}
const performace = performance.now()
const length = Object.keys(bias).length
let charAlt = [
data,
data.trim(),
data.toLocaleUpperCase(),
data.toLocaleLowerCase(),
data[0].toLocaleUpperCase() + data.slice(1),
data[0].toLocaleLowerCase() + data.slice(1),
]
let banChars = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~“”‘’«»「」…–―※"
let unbanChars:number[] = []
for(const char of banChars){
unbanChars.push((await tokenizeNum(char))[0])
}
for(const char of banChars){
for(const alt of charAlt){
let fchar = char
const encoded = await tokenizeNum(alt + fchar)
if(encoded.length > 0){
if(!unbanChars.includes(encoded[0])){
bias[encoded[0]] = -100
}
}
const encoded2 = await tokenizeNum(fchar + alt)
if(encoded2.length > 0){
if(!unbanChars.includes(encoded2[0])){
bias[encoded2[0]] = -100
}
}
}
}
console.log('strongBan', performance.now() - performace)
console.log('added', Object.keys(bias).length - length)
localStorage.setItem('strongBan_' + data, JSON.stringify(bias))
return bias
}