Add code block highlighting and new quote marking

This commit is contained in:
kwaroran
2024-07-02 05:51:39 +09:00
parent f38b88e208
commit 0e8e72e680
4 changed files with 50 additions and 216 deletions

View File

@@ -44,6 +44,7 @@
"fflate": "^0.8.1",
"gpt-3-encoder": "^1.1.4",
"gpt3-tokenizer": "^1.1.5",
"highlight.js": "^11.9.0",
"html-to-image": "^1.11.11",
"isomorphic-dompurify": "^1.13.0",
"jszip": "^3.10.1",

9
pnpm-lock.yaml generated
View File

@@ -92,6 +92,9 @@ importers:
gpt3-tokenizer:
specifier: ^1.1.5
version: 1.1.5
highlight.js:
specifier: ^11.9.0
version: 11.9.0
html-to-image:
specifier: ^1.11.11
version: 1.11.11
@@ -2024,6 +2027,10 @@ packages:
resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==}
hasBin: true
highlight.js@11.9.0:
resolution: {integrity: sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw==}
engines: {node: '>=12.0.0'}
hosted-git-info@2.8.9:
resolution: {integrity: sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==}
@@ -5617,6 +5624,8 @@ snapshots:
he@1.2.0: {}
highlight.js@11.9.0: {}
hosted-git-info@2.8.9: {}
hosted-git-info@4.1.0:

View File

@@ -9,23 +9,43 @@ import { CurrentChat, SizeStore, selectedCharID } from './stores';
import { calcString } from './process/infunctions';
import { findCharacterbyId, parseKeyValue, sfc32, sleep, uuidtoNumber } from './util';
import { getInlayImage, writeInlayImage } from './process/files/image';
import { risuFormater } from './plugins/automark';
import { getModuleLorebooks } from './process/modules';
import { HypaProcesser } from './process/memory/hypamemory';
import { generateAIImage } from './process/stableDiff';
import { requestChatData } from './process/request';
import type { OpenAIChat } from './process';
import { alertInput, alertNormal } from './alert';
import hljs from 'highlight.js/lib/core'
import hljavascript from 'highlight.js/lib/languages/javascript';
import hlpython from 'highlight.js/lib/languages/python';
import hlcss from 'highlight.js/lib/languages/css';
import hlhtml from 'highlight.js/lib/languages/xml';
import hllua from 'highlight.js/lib/languages/lua';
import 'highlight.js/styles/atom-one-dark.min.css'
hljs.registerLanguage('javascript', hljavascript);
hljs.registerLanguage('python', hlpython);
hljs.registerLanguage('css', hlcss);
hljs.registerLanguage('html', hlhtml);
hljs.registerLanguage('lua', hllua);
const mconverted = markdownit({
html: true,
breaks: true,
linkify: false
linkify: false,
typographer: true,
quotes: '\u{E9b0}\u{E9b1}\u{E9b2}\u{E9b3}', //placeholder characters to convert to real quotes
highlight: function (str, lang) {
if (lang && hljs.getLanguage(lang)) {
try {
return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';
} catch (__) {}
}
return ''
}
})
mconverted.disable(['code'])
DOMPurify.addHook("uponSanitizeElement", (node: HTMLElement, data) => {
if (data.tagName === "iframe") {
const src = node.getAttribute("src") || "";
@@ -44,6 +64,9 @@ DOMPurify.addHook("uponSanitizeAttribute", (node, data) => {
case 'class':{
if(data.attrValue){
data.attrValue = data.attrValue.split(' ').map((v) => {
if(v.startsWith('hljs')){
return v
}
return "x-risu-" + v
}).join(' ')
}
@@ -60,6 +83,14 @@ DOMPurify.addHook("uponSanitizeAttribute", (node, data) => {
}
})
function renderMarkdown(data:string){
return mconverted.render(data)
.replace(/\uE9b0/gu, '<mark risu-mark="quote2">')
.replace(/\uE9b1/gu, '</mark>')
.replace(/\uE9b2/gu, '<mark risu-mark="quote1">')
.replace(/\uE9b3/gu, '</mark>')
}
export const assetRegex = /{{(raw|img|video|audio|bg|emotion|asset|video-img)::(.+?)}}/g
@@ -180,8 +211,7 @@ export async function ParseMarkdown(data:string, charArg:(character|simpleCharac
data = encodeStyle(data)
if(mode === 'normal'){
data = risuFormater(data)
data = mconverted.render(data)
data = renderMarkdown(data)
}
return decodeStyle(DOMPurify.sanitize(data, {
ADD_TAGS: ["iframe", "style", "risu-style", "x-em"],
@@ -190,7 +220,7 @@ export async function ParseMarkdown(data:string, charArg:(character|simpleCharac
}
export function postTranslationParse(data:string){
let lines = risuFormater(data).split('\n')
let lines = data.split('\n')
for(let i=0;i<lines.length;i++){
const trimed = lines[i].trim()
@@ -199,12 +229,12 @@ export function postTranslationParse(data:string){
}
}
data = mconverted.render(lines.join('\n'))
data = renderMarkdown(lines.join('\n'))
return data
}
export function parseMarkdownSafe(data:string) {
return DOMPurify.sanitize(mconverted.render(data), {
return DOMPurify.sanitize(renderMarkdown(data), {
FORBID_TAGS: ["a", "style"],
FORBID_ATTR: ["style", "href", "class"]
})
@@ -2285,7 +2315,7 @@ export function applyMarkdownToNode(node: Node) {
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent;
if (text) {
let markdown = mconverted.render(text);
let markdown = renderMarkdown(text);
if (markdown !== text) {
const span = document.createElement('span');
span.innerHTML = markdown;

View File

@@ -1,206 +0,0 @@
const excludesDat = ['<','>','{','}','[',']','(',')','-',':',';','…','—','','_','*','+','/','\\','|','!','?','.',',',' ']
const symbols = ['<','>','{','}','[',']','(',')','-',':',';','…','—','','_','*','+','/','\\','|','!','?','.',',',' ', '\n', '。', '、', '', '', '', '', '', '', '', '【', '】', '「', '」', '『', '』', '“', '”', '', '', '《', '》', '〈', '〉', '', '', '«', '»', '‟', '„']
const selfClosingTags = [
'br','hr','img','input','meta','link','base','area','col','command','embed','keygen','param','source','track','wbr',
//self closing tags defined by HTML5
'!',
//for doctype <!DOCTYPE html> and comment <!-- -->
'user', 'bot', 'char'
//special tags for user, bot, and char
]
const checkSelfClosingTag = (dat:string) => {
dat = dat.substring(0, 10) //we only need to check the first 10 characters, to avoid checking the whole string
dat = dat.toLowerCase() //we don't care about the case
for(const tag of selfClosingTags){
if(dat.startsWith(tag)){
return true
}
}
return false
}
export function risuFormater(dat:string){
const lines:[string,string][] = [['','']] // [type, content]
let htmlType = 0 // 0: not inside tag, 1: closing tag, 2: opening tag
for(let i=0;i<dat.length;i++){
const getLastLine = () => {
return lines[lines.length-1] ?? [
'not-found', ''
]
}
//html tag handling
if(dat[i] === '<' && getLastLine()[0] !== 'code-block'){
lines.push(['html-tag',''])
if(dat[i+1] === '/'){
htmlType = 1
}
else{
htmlType = 2
}
}
if(dat[i] === '>' && getLastLine()[0] === 'html-tag'){
const pop = lines.pop()
const tagAttr = pop[1].substring(1).trim()
if(htmlType === 1){
const pop2 = lines.pop() //probably html-inner
const chunk = pop2[1] + pop[1] + '>'
if(getLastLine()[0] === ''){
lines.push(['html-chunk',chunk])
lines.push(['',''])
}
else{
getLastLine()[1] += chunk
}
continue
}
else if(checkSelfClosingTag(tagAttr)){
const chunk = pop[1] + '>'
if(getLastLine()[0] === ''){
lines.push(['html-chunk',chunk])
lines.push(['',''])
}
else{
getLastLine()[1] += chunk
}
continue
}
else{
lines.push(['html-inner',pop[1]])
}
htmlType = 0
}
//code block handling
if(dat[i] === '`' && dat[i+1] === '`' && dat[i+2] === '`' && getLastLine()[0] === ''){
if(getLastLine()[0] === 'code-block'){
getLastLine()[1] += '```'
lines.push(['',''])
}
else{
lines.push(['code-block','```'])
}
i += 2
continue
}
if(dat[i] === '\n' && getLastLine()[0] === ''){
lines.push(['newline','\n'])
lines.push(['',''])
}
else if(lines[lines.length-1]){
lines[lines.length-1][1] += dat[i]
}
}
let result = ''
for(let i=0;i<lines.length;i++){
if(lines[i][0] !== ''){
result += lines[i][1]
continue
}
let line = lines[i][1] ??''
let isNumbered = false
let endMarked = false
if(excludesDat.includes(line[0]) || (line[1] === '.' && ['1','2','3','4','5','6','7','8','9'].includes(line[0]))){
isNumbered = true
}
if(line.endsWith('>') || line.endsWith('}') || line.startsWith('<')){
endMarked = true
}
if(isNumbered || endMarked){
result += line
continue
}
let depth = 0
let depthChunk:string[] = ['']
let depthChunkType:string[] = ['']
//spaces for detection
line = ' ' + line + ' '
const isNotCharacter = (t:string) => {
return symbols.includes(t)
}
for(let j=0;j<line.length;j++){
switch(line[j]){
case '"':
case '“':
case '”':{
if(depthChunkType[depth] === '"'){
depthChunkType.pop()
const pop = depthChunk.pop()
depth--
depthChunk[depth] += `<mark risu-mark="quote2">${pop}${line[j]}</mark>`
}
else{
depthChunkType.push('"')
depthChunk.push(line[j])
depth++
}
break
}
case "'":
case '':
case '':{
if(depthChunkType[depth] === "'"){
if(isNotCharacter(line[j-1]) || !isNotCharacter(line[j+1]) || (line[j-2] === 'i' && line[j-1] === 'n')){
//this is not a quote
depthChunk[depth] += line[j]
}
else{
depthChunkType.pop()
const pop = depthChunk.pop()
depth--
depthChunk[depth] += `<mark risu-mark="quote1">${pop}${line[j]}</mark>`
}
}
else{
if(!isNotCharacter(line[j-1]) || isNotCharacter(line[j+1])){
//this is not a quote
depthChunk[depth] += line[j]
}
else{
depthChunkType.push("'")
depthChunk.push(line[j])
depth++
}
}
break
}
default:{
depthChunk[depth] += line[j]
}
}
}
let lineResult = ''
while(depthChunk.length > 0){
lineResult = depthChunk.pop() + lineResult
}
if(lineResult.startsWith(' ')){
lineResult = lineResult.substring(1)
}
if(lineResult.endsWith(' ')){
lineResult = lineResult.substring(0,lineResult.length-1)
}
result += lineResult
}
return result.trim()
}