[feat] oaifixer

This commit is contained in:
kwaroran
2023-12-06 06:07:26 +09:00
parent 1aa165deab
commit b1521384d3
6 changed files with 71 additions and 30 deletions

43
src/ts/plugins/fixer.ts Normal file
View File

@@ -0,0 +1,43 @@
export function OaifixEmdash(bias:{[key:number]:number}){
const emdashes = [
2001, 2345, 8713, 16620, 17223,
22416, 29096, 29472, 30697, 35192,
38542, 41128, 44603, 49525, 50004,
50617, 51749, 51757, 55434, 60654,
61311, 63750, 63938, 63977, 66101,
68850, 71201, 71480, 72318, 76070,
76929, 80078, 81902, 83872, 84941,
85366, 86319, 87247, 87671, 88958,
90863, 93830, 96197, 99563
]
for (const emdash of emdashes) {
bias[emdash] = -100
}
return bias
}
export function OaiFixKorean(text:string){
//tokenizer problem fixes
const replacer = {
//commonly wrong english
'피츠': '피스',
'스커츠': '스커트',
'스파츠': '스커트',
'스마트폰': '스파트폰',
'스위츠': '스위치',
'해도 되': '해도 돼',
'해도 됩니다': '해도 돼요',
'에레베이터': '엘리베이터',
'에리베이터': '엘리베이터',
'에레바토르': '엘리베이터',
}
for (const key in replacer) {
text = text.replace(key, replacer[key])
}
return text
}