Add code block highlighting and new quote marking
This commit is contained in:
@@ -44,6 +44,7 @@
|
||||
"fflate": "^0.8.1",
|
||||
"gpt-3-encoder": "^1.1.4",
|
||||
"gpt3-tokenizer": "^1.1.5",
|
||||
"highlight.js": "^11.9.0",
|
||||
"html-to-image": "^1.11.11",
|
||||
"isomorphic-dompurify": "^1.13.0",
|
||||
"jszip": "^3.10.1",
|
||||
|
||||
9
pnpm-lock.yaml
generated
9
pnpm-lock.yaml
generated
@@ -92,6 +92,9 @@ importers:
|
||||
gpt3-tokenizer:
|
||||
specifier: ^1.1.5
|
||||
version: 1.1.5
|
||||
highlight.js:
|
||||
specifier: ^11.9.0
|
||||
version: 11.9.0
|
||||
html-to-image:
|
||||
specifier: ^1.11.11
|
||||
version: 1.11.11
|
||||
@@ -2024,6 +2027,10 @@ packages:
|
||||
resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==}
|
||||
hasBin: true
|
||||
|
||||
highlight.js@11.9.0:
|
||||
resolution: {integrity: sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw==}
|
||||
engines: {node: '>=12.0.0'}
|
||||
|
||||
hosted-git-info@2.8.9:
|
||||
resolution: {integrity: sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==}
|
||||
|
||||
@@ -5617,6 +5624,8 @@ snapshots:
|
||||
|
||||
he@1.2.0: {}
|
||||
|
||||
highlight.js@11.9.0: {}
|
||||
|
||||
hosted-git-info@2.8.9: {}
|
||||
|
||||
hosted-git-info@4.1.0:
|
||||
|
||||
@@ -9,23 +9,43 @@ import { CurrentChat, SizeStore, selectedCharID } from './stores';
|
||||
import { calcString } from './process/infunctions';
|
||||
import { findCharacterbyId, parseKeyValue, sfc32, sleep, uuidtoNumber } from './util';
|
||||
import { getInlayImage, writeInlayImage } from './process/files/image';
|
||||
import { risuFormater } from './plugins/automark';
|
||||
import { getModuleLorebooks } from './process/modules';
|
||||
import { HypaProcesser } from './process/memory/hypamemory';
|
||||
import { generateAIImage } from './process/stableDiff';
|
||||
import { requestChatData } from './process/request';
|
||||
import type { OpenAIChat } from './process';
|
||||
import { alertInput, alertNormal } from './alert';
|
||||
import hljs from 'highlight.js/lib/core'
|
||||
import hljavascript from 'highlight.js/lib/languages/javascript';
|
||||
import hlpython from 'highlight.js/lib/languages/python';
|
||||
import hlcss from 'highlight.js/lib/languages/css';
|
||||
import hlhtml from 'highlight.js/lib/languages/xml';
|
||||
import hllua from 'highlight.js/lib/languages/lua';
|
||||
import 'highlight.js/styles/atom-one-dark.min.css'
|
||||
|
||||
hljs.registerLanguage('javascript', hljavascript);
|
||||
hljs.registerLanguage('python', hlpython);
|
||||
hljs.registerLanguage('css', hlcss);
|
||||
hljs.registerLanguage('html', hlhtml);
|
||||
hljs.registerLanguage('lua', hllua);
|
||||
|
||||
const mconverted = markdownit({
|
||||
html: true,
|
||||
breaks: true,
|
||||
linkify: false
|
||||
linkify: false,
|
||||
typographer: true,
|
||||
quotes: '\u{E9b0}\u{E9b1}\u{E9b2}\u{E9b3}', //placeholder characters to convert to real quotes
|
||||
highlight: function (str, lang) {
|
||||
if (lang && hljs.getLanguage(lang)) {
|
||||
try {
|
||||
return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';
|
||||
} catch (__) {}
|
||||
}
|
||||
return ''
|
||||
}
|
||||
})
|
||||
mconverted.disable(['code'])
|
||||
|
||||
|
||||
|
||||
DOMPurify.addHook("uponSanitizeElement", (node: HTMLElement, data) => {
|
||||
if (data.tagName === "iframe") {
|
||||
const src = node.getAttribute("src") || "";
|
||||
@@ -44,6 +64,9 @@ DOMPurify.addHook("uponSanitizeAttribute", (node, data) => {
|
||||
case 'class':{
|
||||
if(data.attrValue){
|
||||
data.attrValue = data.attrValue.split(' ').map((v) => {
|
||||
if(v.startsWith('hljs')){
|
||||
return v
|
||||
}
|
||||
return "x-risu-" + v
|
||||
}).join(' ')
|
||||
}
|
||||
@@ -60,6 +83,14 @@ DOMPurify.addHook("uponSanitizeAttribute", (node, data) => {
|
||||
}
|
||||
})
|
||||
|
||||
function renderMarkdown(data:string){
|
||||
return mconverted.render(data)
|
||||
.replace(/\uE9b0/gu, '<mark risu-mark="quote2">“')
|
||||
.replace(/\uE9b1/gu, '”</mark>')
|
||||
.replace(/\uE9b2/gu, '<mark risu-mark="quote1">‘')
|
||||
.replace(/\uE9b3/gu, '’</mark>')
|
||||
}
|
||||
|
||||
|
||||
export const assetRegex = /{{(raw|img|video|audio|bg|emotion|asset|video-img)::(.+?)}}/g
|
||||
|
||||
@@ -180,8 +211,7 @@ export async function ParseMarkdown(data:string, charArg:(character|simpleCharac
|
||||
|
||||
data = encodeStyle(data)
|
||||
if(mode === 'normal'){
|
||||
data = risuFormater(data)
|
||||
data = mconverted.render(data)
|
||||
data = renderMarkdown(data)
|
||||
}
|
||||
return decodeStyle(DOMPurify.sanitize(data, {
|
||||
ADD_TAGS: ["iframe", "style", "risu-style", "x-em"],
|
||||
@@ -190,7 +220,7 @@ export async function ParseMarkdown(data:string, charArg:(character|simpleCharac
|
||||
}
|
||||
|
||||
export function postTranslationParse(data:string){
|
||||
let lines = risuFormater(data).split('\n')
|
||||
let lines = data.split('\n')
|
||||
|
||||
for(let i=0;i<lines.length;i++){
|
||||
const trimed = lines[i].trim()
|
||||
@@ -199,12 +229,12 @@ export function postTranslationParse(data:string){
|
||||
}
|
||||
}
|
||||
|
||||
data = mconverted.render(lines.join('\n'))
|
||||
data = renderMarkdown(lines.join('\n'))
|
||||
return data
|
||||
}
|
||||
|
||||
export function parseMarkdownSafe(data:string) {
|
||||
return DOMPurify.sanitize(mconverted.render(data), {
|
||||
return DOMPurify.sanitize(renderMarkdown(data), {
|
||||
FORBID_TAGS: ["a", "style"],
|
||||
FORBID_ATTR: ["style", "href", "class"]
|
||||
})
|
||||
@@ -2285,7 +2315,7 @@ export function applyMarkdownToNode(node: Node) {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
const text = node.textContent;
|
||||
if (text) {
|
||||
let markdown = mconverted.render(text);
|
||||
let markdown = renderMarkdown(text);
|
||||
if (markdown !== text) {
|
||||
const span = document.createElement('span');
|
||||
span.innerHTML = markdown;
|
||||
|
||||
@@ -1,206 +0,0 @@
|
||||
|
||||
const excludesDat = ['<','>','{','}','[',']','(',')','-',':',';','…','—','–','_','*','+','/','\\','|','!','?','.',',',' ']
|
||||
const symbols = ['<','>','{','}','[',']','(',')','-',':',';','…','—','–','_','*','+','/','\\','|','!','?','.',',',' ', '\n', '。', '、', '!', '?', ',', ';', ':', '(', ')', '【', '】', '「', '」', '『', '』', '“', '”', '‘', '’', '《', '》', '〈', '〉', '‹', '›', '«', '»', '‟', '„']
|
||||
|
||||
const selfClosingTags = [
|
||||
'br','hr','img','input','meta','link','base','area','col','command','embed','keygen','param','source','track','wbr',
|
||||
//self closing tags defined by HTML5
|
||||
'!',
|
||||
//for doctype <!DOCTYPE html> and comment <!-- -->
|
||||
'user', 'bot', 'char'
|
||||
//special tags for user, bot, and char
|
||||
]
|
||||
|
||||
const checkSelfClosingTag = (dat:string) => {
|
||||
dat = dat.substring(0, 10) //we only need to check the first 10 characters, to avoid checking the whole string
|
||||
dat = dat.toLowerCase() //we don't care about the case
|
||||
for(const tag of selfClosingTags){
|
||||
if(dat.startsWith(tag)){
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
export function risuFormater(dat:string){
|
||||
const lines:[string,string][] = [['','']] // [type, content]
|
||||
let htmlType = 0 // 0: not inside tag, 1: closing tag, 2: opening tag
|
||||
for(let i=0;i<dat.length;i++){
|
||||
|
||||
const getLastLine = () => {
|
||||
return lines[lines.length-1] ?? [
|
||||
'not-found', ''
|
||||
]
|
||||
}
|
||||
//html tag handling
|
||||
if(dat[i] === '<' && getLastLine()[0] !== 'code-block'){
|
||||
lines.push(['html-tag',''])
|
||||
if(dat[i+1] === '/'){
|
||||
htmlType = 1
|
||||
}
|
||||
else{
|
||||
htmlType = 2
|
||||
}
|
||||
}
|
||||
|
||||
if(dat[i] === '>' && getLastLine()[0] === 'html-tag'){
|
||||
const pop = lines.pop()
|
||||
const tagAttr = pop[1].substring(1).trim()
|
||||
if(htmlType === 1){
|
||||
const pop2 = lines.pop() //probably html-inner
|
||||
const chunk = pop2[1] + pop[1] + '>'
|
||||
if(getLastLine()[0] === ''){
|
||||
lines.push(['html-chunk',chunk])
|
||||
lines.push(['',''])
|
||||
}
|
||||
else{
|
||||
getLastLine()[1] += chunk
|
||||
}
|
||||
continue
|
||||
}
|
||||
else if(checkSelfClosingTag(tagAttr)){
|
||||
const chunk = pop[1] + '>'
|
||||
if(getLastLine()[0] === ''){
|
||||
lines.push(['html-chunk',chunk])
|
||||
lines.push(['',''])
|
||||
}
|
||||
else{
|
||||
getLastLine()[1] += chunk
|
||||
}
|
||||
continue
|
||||
}
|
||||
else{
|
||||
lines.push(['html-inner',pop[1]])
|
||||
}
|
||||
htmlType = 0
|
||||
}
|
||||
|
||||
//code block handling
|
||||
|
||||
if(dat[i] === '`' && dat[i+1] === '`' && dat[i+2] === '`' && getLastLine()[0] === ''){
|
||||
if(getLastLine()[0] === 'code-block'){
|
||||
getLastLine()[1] += '```'
|
||||
lines.push(['',''])
|
||||
}
|
||||
else{
|
||||
lines.push(['code-block','```'])
|
||||
}
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
if(dat[i] === '\n' && getLastLine()[0] === ''){
|
||||
lines.push(['newline','\n'])
|
||||
lines.push(['',''])
|
||||
}
|
||||
else if(lines[lines.length-1]){
|
||||
lines[lines.length-1][1] += dat[i]
|
||||
}
|
||||
}
|
||||
|
||||
let result = ''
|
||||
for(let i=0;i<lines.length;i++){
|
||||
if(lines[i][0] !== ''){
|
||||
result += lines[i][1]
|
||||
continue
|
||||
}
|
||||
|
||||
let line = lines[i][1] ??''
|
||||
let isNumbered = false
|
||||
let endMarked = false
|
||||
if(excludesDat.includes(line[0]) || (line[1] === '.' && ['1','2','3','4','5','6','7','8','9'].includes(line[0]))){
|
||||
isNumbered = true
|
||||
}
|
||||
if(line.endsWith('>') || line.endsWith('}') || line.startsWith('<')){
|
||||
endMarked = true
|
||||
}
|
||||
|
||||
if(isNumbered || endMarked){
|
||||
result += line
|
||||
continue
|
||||
}
|
||||
|
||||
let depth = 0
|
||||
let depthChunk:string[] = ['']
|
||||
let depthChunkType:string[] = ['']
|
||||
|
||||
//spaces for detection
|
||||
line = ' ' + line + ' '
|
||||
|
||||
const isNotCharacter = (t:string) => {
|
||||
return symbols.includes(t)
|
||||
}
|
||||
|
||||
for(let j=0;j<line.length;j++){
|
||||
switch(line[j]){
|
||||
case '"':
|
||||
case '“':
|
||||
case '”':{
|
||||
if(depthChunkType[depth] === '"'){
|
||||
depthChunkType.pop()
|
||||
const pop = depthChunk.pop()
|
||||
depth--
|
||||
depthChunk[depth] += `<mark risu-mark="quote2">${pop}${line[j]}</mark>`
|
||||
}
|
||||
else{
|
||||
depthChunkType.push('"')
|
||||
depthChunk.push(line[j])
|
||||
depth++
|
||||
}
|
||||
break
|
||||
}
|
||||
case "'":
|
||||
case '‘':
|
||||
case '’':{
|
||||
if(depthChunkType[depth] === "'"){
|
||||
if(isNotCharacter(line[j-1]) || !isNotCharacter(line[j+1]) || (line[j-2] === 'i' && line[j-1] === 'n')){
|
||||
//this is not a quote
|
||||
depthChunk[depth] += line[j]
|
||||
}
|
||||
else{
|
||||
depthChunkType.pop()
|
||||
const pop = depthChunk.pop()
|
||||
depth--
|
||||
depthChunk[depth] += `<mark risu-mark="quote1">${pop}${line[j]}</mark>`
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(!isNotCharacter(line[j-1]) || isNotCharacter(line[j+1])){
|
||||
//this is not a quote
|
||||
depthChunk[depth] += line[j]
|
||||
}
|
||||
else{
|
||||
depthChunkType.push("'")
|
||||
depthChunk.push(line[j])
|
||||
depth++
|
||||
}
|
||||
}
|
||||
break
|
||||
|
||||
}
|
||||
|
||||
default:{
|
||||
depthChunk[depth] += line[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let lineResult = ''
|
||||
|
||||
while(depthChunk.length > 0){
|
||||
lineResult = depthChunk.pop() + lineResult
|
||||
}
|
||||
|
||||
if(lineResult.startsWith(' ')){
|
||||
lineResult = lineResult.substring(1)
|
||||
}
|
||||
if(lineResult.endsWith(' ')){
|
||||
lineResult = lineResult.substring(0,lineResult.length-1)
|
||||
}
|
||||
|
||||
result += lineResult
|
||||
}
|
||||
|
||||
return result.trim()
|
||||
}
|
||||
Reference in New Issue
Block a user