Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { computed, inject } from 'vue';
import { getTipTapEditorStrings } from '../TipTapEditorStrings';
import { sanitizePastedHTML } from '../utils/markdown';

export function useToolbarActions(emit) {
const editor = inject('editor', null);
Expand Down Expand Up @@ -135,35 +136,48 @@ export function useToolbarActions(emit) {
};

const handlePaste = async () => {
if (editor.value) {
try {
// Try HTML first
const clipboardData = await navigator.clipboard.read();
const htmlType = clipboardData[0].types.find(type => type === 'text/html');

if (htmlType) {
const htmlBlob = await clipboardData[0].getType('text/html');
const html = await htmlBlob.text();
editor.value.chain().focus().insertContent(html).run();
} else {
// Fall back to plain text
handlePasteNoFormat();
if (!editor.value) return;

try {
if (navigator.clipboard?.read) {
const items = await navigator.clipboard.read();

for (const item of items) {
if (item.types.includes('text/html')) {
const htmlBlob = await item.getType('text/html');
const html = await htmlBlob.text();
const cleaned = sanitizePastedHTML(html);

editor.value.chain().focus().insertContent(cleaned).run();
return;
}
if (item.types.includes('text/plain')) {
const textBlob = await item.getType('text/plain');
const text = await textBlob.text();

editor.value.chain().focus().insertContent(text).run();
return;
}
}
} catch (err) {
editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}
} catch (err) {
editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}
};

const handlePasteNoFormat = async () => {
if (editor.value) {
try {
// Read plain text from clipboard
const text = await navigator.clipboard.readText();
editor.value.chain().focus().insertContent(text).run();
} catch (err) {
editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}
if (!editor.value) return;

try {
const text = await navigator.clipboard.readText();
if (!text) return;

// Note: Genereted this regex with the help of LLM.
const normalized = text.replace(/\r\n/g, '\n');

editor.value.chain().focus().insertContent(normalized).run();
} catch (err) {
editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,67 @@ export const paramsToMathMd = ({ latex }) => {
return `$$${latex || ''}$$`;
};

export function sanitizePastedHTML(html) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is specific to MS word then maybe putting that in the name is a good idea a la sanitizeMSWordHTML or something?

Copy link
Member Author

@AllanOXDi AllanOXDi Dec 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the sanitizer initially targeted MS Word, but we’ve expanded it to handle copy-paste issues from Google Docs and LibreOffice as well (e.g., strike-through bleed, nested list normalization). And since it now applies to all external HTML paste sources, renaming it to sanitizeMSWordHTML would be misleading. Keeping the more general name sanitizePastedHTML better reflects its broader use and sounds good to me.

if (!html) return '';
// This code ine 55 to 66 is geneted with the help of LLM with the prompt
// "Create a function that sanitizes HTML pasted from Microsoft
// Word by removing Word-specific tags, styles, and classes while preserving other formatting."
let cleaned = html;
cleaned = cleaned.replace(/<!--\[if.*?endif\]-->/gis, '');
cleaned = cleaned.replace(/<\/?(w|m|o|v):[^>]*>/gis, '');
const parser = new DOMParser();
const doc = parser.parseFromString(cleaned, 'text/html');
doc.querySelectorAll('*').forEach(el => {
if (el.hasAttribute('style')) {
const style = el.getAttribute('style') || '';
const filtered = style
.split(';')
.map(s => s.trim())
.filter(s => s && !s.toLowerCase().startsWith('mso-'))
.join('; ');
if (filtered) {
el.setAttribute('style', filtered);
} else {
el.removeAttribute('style');
}
}
if (el.hasAttribute('class')) {
const cls = el
.getAttribute('class')
.split(/\s+/)
.filter(c => c && !/^Mso/i.test(c))
.join(' ');
if (cls) {
el.setAttribute('class', cls);
} else {
el.removeAttribute('class');
}
}
});
const strikeElements = doc.querySelectorAll('s, strike, del');
strikeElements.forEach(el => {
const nestedLists = el.querySelectorAll('ul, ol');
if (nestedLists.length > 0) {
nestedLists.forEach(list => {
el.parentNode.insertBefore(list, el.nextSibling);
});
}
});
const lists = doc.querySelectorAll('ul, ol');
lists.forEach(list => {
const items = list.querySelectorAll(':scope > li');
items.forEach(item => {
const nestedLists = Array.from(item.children).filter(
child => child.tagName === 'UL' || child.tagName === 'OL',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these uppercase? Is that coming from the pasted text?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The uppercase comes from the DOM API itself. Browsers normalize element.tagName to uppercase for all HTML elements, regardless of how they appear in the pasted HTML. https://developer.mozilla.org/en-US/docs/Web/API/Element/tagName.
https://html.spec.whatwg.org/multipage/dom.html#htmlelement

HTML elements have an uppercase local name.

So I think checking against 'UL' and 'OL' is the correct and standard way to identify list elements in sanitized HTML

);
nestedLists.forEach(nestedList => {
item.appendChild(nestedList);
});
});
});
return doc.body.innerHTML;
}

/**
* Pre-processes a raw Markdown string to convert custom syntax into HTML tags
* that Tiptap's extensions can understand. This is our custom "loader".
Expand Down Expand Up @@ -87,6 +148,5 @@ export function preprocessMarkdown(markdown) {
return `<span data-latex="${params.latex}"></span>`;
});

// Use marked.js to parse the rest of the markdown
return marked(processedMarkdown);
}