learningequality · AllanOXDi · Dec 4, 2025 · Dec 4, 2025 · Dec 5, 2025 · nucleogenesis
diff --git a/...curation/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js b/...curation/frontend/shared/views/TipTapEditor/TipTapEditor/composables/useToolbarActions.js
@@ -1,5 +1,6 @@
 import { computed, inject } from 'vue';
 import { getTipTapEditorStrings } from '../TipTapEditorStrings';
+import { sanitizePastedHTML } from '../utils/markdown';
 
 export function useToolbarActions(emit) {
   const editor = inject('editor', null);
@@ -135,35 +136,48 @@ export function useToolbarActions(emit) {
   };
 
   const handlePaste = async () => {
-    if (editor.value) {
-      try {
-        // Try HTML first
-        const clipboardData = await navigator.clipboard.read();
-        const htmlType = clipboardData[0].types.find(type => type === 'text/html');
-
-        if (htmlType) {
-          const htmlBlob = await clipboardData[0].getType('text/html');
-          const html = await htmlBlob.text();
-          editor.value.chain().focus().insertContent(html).run();
-        } else {
-          // Fall back to plain text
-          handlePasteNoFormat();
+    if (!editor.value) return;
+
+    try {
+      if (navigator.clipboard?.read) {
+        const items = await navigator.clipboard.read();
+
+        for (const item of items) {
+          if (item.types.includes('text/html')) {
+            const htmlBlob = await item.getType('text/html');
+            const html = await htmlBlob.text();
+            const cleaned = sanitizePastedHTML(html);
+
+            editor.value.chain().focus().insertContent(cleaned).run();
+            return;
+          }
+          if (item.types.includes('text/plain')) {
+            const textBlob = await item.getType('text/plain');
+            const text = await textBlob.text();
+
+            editor.value.chain().focus().insertContent(text).run();
+            return;
+          }
         }
-      } catch (err) {
-        editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
       }
+    } catch (err) {
+      editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
     }
   };
 
   const handlePasteNoFormat = async () => {
-    if (editor.value) {
-      try {
-        // Read plain text from clipboard
-        const text = await navigator.clipboard.readText();
-        editor.value.chain().focus().insertContent(text).run();
-      } catch (err) {
-        editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
-      }
+    if (!editor.value) return;
+
+    try {
+      const text = await navigator.clipboard.readText();
+      if (!text) return;
+
+      // Note: Genereted this regex with the help of LLM.
+      const normalized = text.replace(/\r\n/g, '\n');
+
+      editor.value.chain().focus().insertContent(normalized).run();
+    } catch (err) {
+      editor.value.chain().focus().insertContent(clipboardAccessFailed$()).run();
     }
   };
 

diff --git a/...uration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js b/...uration/contentcuration/frontend/shared/views/TipTapEditor/TipTapEditor/utils/markdown.js
@@ -51,6 +51,67 @@ export const paramsToMathMd = ({ latex }) => {
   return `$$${latex || ''}$$`;
 };
 
+export function sanitizePastedHTML(html) {
+  if (!html) return '';
+  // This code ine 55 to 66 is geneted with the help of LLM with the prompt
+  // "Create a function that sanitizes HTML pasted from Microsoft
+  // Word by removing Word-specific tags, styles, and classes while preserving other formatting."
+  let cleaned = html;
+  cleaned = cleaned.replace(/<!--\[if.*?endif\]-->/gis, '');
+  cleaned = cleaned.replace(/<\/?(w|m|o|v):[^>]*>/gis, '');
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(cleaned, 'text/html');
+  doc.querySelectorAll('*').forEach(el => {
+    if (el.hasAttribute('style')) {
+      const style = el.getAttribute('style') || '';
+      const filtered = style
+        .split(';')
+        .map(s => s.trim())
+        .filter(s => s && !s.toLowerCase().startsWith('mso-'))
+        .join('; ');
+      if (filtered) {
+        el.setAttribute('style', filtered);
+      } else {
+        el.removeAttribute('style');
+      }
+    }
+    if (el.hasAttribute('class')) {
+      const cls = el
+        .getAttribute('class')
+        .split(/\s+/)
+        .filter(c => c && !/^Mso/i.test(c))
+        .join(' ');
+      if (cls) {
+        el.setAttribute('class', cls);
+      } else {
+        el.removeAttribute('class');
+      }
+    }
+  });
+  const strikeElements = doc.querySelectorAll('s, strike, del');
+  strikeElements.forEach(el => {
+    const nestedLists = el.querySelectorAll('ul, ol');
+    if (nestedLists.length > 0) {
+      nestedLists.forEach(list => {
+        el.parentNode.insertBefore(list, el.nextSibling);
+      });
+    }
+  });
+  const lists = doc.querySelectorAll('ul, ol');
+  lists.forEach(list => {
+    const items = list.querySelectorAll(':scope > li');
+    items.forEach(item => {
+      const nestedLists = Array.from(item.children).filter(
+        child => child.tagName === 'UL' || child.tagName === 'OL',
+      );
+      nestedLists.forEach(nestedList => {
+        item.appendChild(nestedList);
+      });
+    });
+  });
+  return doc.body.innerHTML;
+}
+
 /**
  * Pre-processes a raw Markdown string to convert custom syntax into HTML tags
  * that Tiptap's extensions can understand. This is our custom "loader".
@@ -87,6 +148,5 @@ export function preprocessMarkdown(markdown) {
     return `<span data-latex="${params.latex}"></span>`;
   });
 
-  // Use marked.js to parse the rest of the markdown
   return marked(processedMarkdown);
 }