feat(BlockEditor): add rich paste support with html-to-blocks parsing

- add clipboard.js with htmlToBlocks, blocksToHtml, and blocksToPlainText helpers
- handle single-paragraph html paste as inline splice preserving block type
- handle multi-block html paste by splitting current block and merging head/tail paragraphs
- add onPasteInline and onPasteBlocks props to Block component
- implement handlePasteInline and handlePasteBlocks in BlockEditor
- fallback to plain text insertion when html is absent or yields no blocks
- update README to document clipboard behaviour and new paste handlers
This commit is contained in:
2026-04-25 20:19:32 -04:00
parent 547b975c01
commit 085a779c74
6 changed files with 540 additions and 11 deletions
@@ -0,0 +1,292 @@
// Sérialisation `Block[]` ↔ HTML pour le presse-papier.
//
// `blocksToHtml` produit un HTML structuré qu'un autre éditeur (Word, Google
// Docs, un autre BlockEditor, …) peut interpréter. `htmlToBlocks` fait
// l'inverse : du HTML quelconque (provenance externe ou interne) vers la
// liste de blocs typés.
//
// Module **neutre** — pas d'import React. Utilise le `document` global
// (paste/copy s'exécutent toujours côté client) et `DOMParser` pour le
// parsing.
import { newBlockId } from '../utils/ids.js';
import { inlineToDom, domToInline } from './serialize.js';
import { inlineFromText, inlineToPlainText, normalize } from './types.js';
const HEADING_RE = /^heading_([1-6])$/;
const BLOCK_TAG_RE = /^(P|H[1-6]|UL|OL|BLOCKQUOTE|PRE|HR|FIGURE|DIV|TABLE)$/;
// Block[] → HTML string. Regroupe les listes consécutives sous un seul
// <ul>/<ol>. Les blocs inconnus deviennent un <p> au texte aplati.
export function blocksToHtml(blocks) {
if (!Array.isArray(blocks) || blocks.length === 0) return '';
if (typeof document === 'undefined') return '';
const fragment = document.createDocumentFragment();
let i = 0;
while (i < blocks.length) {
const block = blocks[i];
if (
block.type === 'bullet_item' ||
block.type === 'numbered_item' ||
block.type === 'checklist'
) {
const tag = block.type === 'numbered_item' ? 'ol' : 'ul';
const list = document.createElement(tag);
if (block.type === 'checklist') list.setAttribute('data-checklist', '');
while (i < blocks.length && blocks[i].type === block.type) {
const li = document.createElement('li');
if (blocks[i].type === 'checklist') {
const cb = document.createElement('input');
cb.type = 'checkbox';
if (blocks[i].checked) cb.setAttribute('checked', '');
li.appendChild(cb);
li.appendChild(document.createTextNode(' '));
}
li.appendChild(inlineToDom(blocks[i].content ?? []));
list.appendChild(li);
i++;
}
fragment.appendChild(list);
continue;
}
fragment.appendChild(blockToElement(block));
i++;
}
const wrapper = document.createElement('div');
wrapper.appendChild(fragment);
return wrapper.innerHTML;
}
function blockToElement(block) {
const heading = HEADING_RE.exec(block.type);
if (heading) {
const h = document.createElement(`h${heading[1]}`);
h.appendChild(inlineToDom(block.content ?? []));
return h;
}
if (block.type === 'paragraph') {
const p = document.createElement('p');
p.appendChild(inlineToDom(block.content ?? []));
return p;
}
if (block.type === 'quote') {
const bq = document.createElement('blockquote');
bq.appendChild(inlineToDom(block.content ?? []));
return bq;
}
if (block.type === 'code') {
const pre = document.createElement('pre');
const code = document.createElement('code');
code.appendChild(inlineToDom(block.content ?? []));
pre.appendChild(code);
return pre;
}
if (block.type === 'divider') {
return document.createElement('hr');
}
if (block.type === 'image') {
const fig = document.createElement('figure');
const img = document.createElement('img');
img.setAttribute('src', block.src || '');
if (block.alt) img.setAttribute('alt', block.alt);
fig.appendChild(img);
if (block.caption) {
const cap = document.createElement('figcaption');
cap.textContent = block.caption;
fig.appendChild(cap);
}
return fig;
}
// Type inconnu : on aplatit en paragraphe.
const p = document.createElement('p');
p.textContent = inlineToPlainText(block.content ?? []);
return p;
}
// Block[] → texte brut pour le MIME `text/plain` complémentaire.
export function blocksToPlainText(blocks) {
if (!Array.isArray(blocks) || blocks.length === 0) return '';
return blocks
.map(b => {
if (b.type === 'divider') return '---';
if (b.type === 'image') return b.alt || b.caption || '';
return inlineToPlainText(b.content ?? []);
})
.join('\n');
}
// HTML string → Block[]. `DOMParser` n'exécute pas les scripts ; les tags
// inconnus contribuent uniquement leur contenu inline (via `domToInline`).
export function htmlToBlocks(html) {
if (!html || typeof DOMParser === 'undefined') return [];
const doc = new DOMParser().parseFromString(html, 'text/html');
const out = [];
parseChildren(doc.body, out);
return out;
}
function parseChildren(node, out) {
const buffer = { nodes: [] };
function flush() {
if (buffer.nodes.length === 0) return;
const content = normalize(buffer.nodes);
buffer.nodes = [];
if (content.length === 0) return;
out.push({ id: newBlockId(), type: 'paragraph', content });
}
for (const child of node.childNodes) {
if (child.nodeType === 3 /* TEXT_NODE */) {
const t = child.nodeValue;
if (t && t.trim()) buffer.nodes.push({ type: 'text', text: t });
continue;
}
if (child.nodeType !== 1 /* ELEMENT_NODE */) continue;
const tag = child.tagName;
const heading = /^H([1-6])$/.exec(tag);
if (heading) {
flush();
const content = domToInline(child);
if (content.length > 0) {
out.push({ id: newBlockId(), type: `heading_${heading[1]}`, content });
}
continue;
}
if (tag === 'P') {
flush();
const content = domToInline(child);
if (content.length > 0) {
out.push({ id: newBlockId(), type: 'paragraph', content });
}
continue;
}
if (tag === 'UL' || tag === 'OL') {
flush();
parseList(child, tag === 'OL', out);
continue;
}
if (tag === 'BLOCKQUOTE') {
flush();
const content = domToInline(child);
if (content.length > 0) {
out.push({ id: newBlockId(), type: 'quote', content });
}
continue;
}
if (tag === 'PRE') {
flush();
const codeEl = child.querySelector('code') || child;
const text = codeEl.textContent || '';
out.push({ id: newBlockId(), type: 'code', content: inlineFromText(text) });
continue;
}
if (tag === 'HR') {
flush();
out.push({ id: newBlockId(), type: 'divider' });
continue;
}
if (tag === 'FIGURE') {
flush();
const img = child.querySelector('img');
if (img) {
const cap = child.querySelector('figcaption');
out.push({
id: newBlockId(),
type: 'image',
src: img.getAttribute('src') || '',
alt: img.getAttribute('alt') || '',
caption: cap?.textContent?.trim() || '',
});
}
continue;
}
if (tag === 'IMG') {
flush();
out.push({
id: newBlockId(),
type: 'image',
src: child.getAttribute('src') || '',
alt: child.getAttribute('alt') || '',
caption: '',
});
continue;
}
if (tag === 'BR') {
// Saut de ligne au top-level → coupure de paragraphe.
flush();
continue;
}
// Wrappers Google Docs / Word ou <div> de mise en page : si l'élément
// contient au moins un descendant block-level, on flush le buffer puis
// on recurse dedans pour récupérer la structure. Sinon on traite
// l'élément comme du contenu inline.
if (hasBlockDescendant(child)) {
flush();
parseChildren(child, out);
continue;
}
// Inline : ajouter au buffer (paragraphe en cours d'accumulation).
const inline = domToInline(child);
buffer.nodes.push(...inline);
}
flush();
}
function hasBlockDescendant(el) {
for (const c of el.children) {
if (BLOCK_TAG_RE.test(c.tagName)) return true;
if (hasBlockDescendant(c)) return true;
}
return false;
}
function parseList(listEl, ordered, out) {
const isChecklist = listEl.hasAttribute('data-checklist');
for (const li of listEl.children) {
if (li.tagName !== 'LI') continue;
// Détection checkbox (héritée du data-checklist OU d'un <input type=checkbox>
// dans le <li>, à la Markdown task list).
const checkbox = li.querySelector(':scope > input[type="checkbox"]');
const isChecklistItem = isChecklist || !!checkbox;
let checked = false;
if (checkbox) {
checked = checkbox.checked || checkbox.hasAttribute('checked');
checkbox.remove();
}
// Si le <li> contient lui-même des sous-listes, on émet d'abord un
// item pour son contenu inline, puis on traite les sous-listes comme
// des items frères (pas de nesting dans notre modèle).
const subLists = Array.from(li.children).filter(
c => c.tagName === 'UL' || c.tagName === 'OL',
);
for (const sub of subLists) sub.remove();
const content = domToInline(li);
if (content.length > 0 || isChecklistItem) {
if (isChecklistItem) {
out.push({ id: newBlockId(), type: 'checklist', content, checked: !!checked });
} else if (ordered) {
out.push({ id: newBlockId(), type: 'numbered_item', content });
} else {
out.push({ id: newBlockId(), type: 'bullet_item', content });
}
}
for (const sub of subLists) parseList(sub, sub.tagName === 'OL', out);
}
}
@@ -170,6 +170,33 @@ function walk(node, marks, out) {
if (highlight) added.push({ type: 'highlight', color: highlight });
}
// Styles CSS inline : Google Docs / Word produisent massivement des
// <span style="font-weight:700"> / <b style="font-weight:normal"> au lieu
// de <strong>/<em>. On lit ces styles pour préserver le formatage à la
// collaboration externe.
const style = node.style;
if (style) {
const fw = style.fontWeight;
const isBoldStyle = fw === 'bold' || fw === 'bolder' || (fw && parseInt(fw, 10) >= 600);
const isNormalStyle = fw === 'normal' || (fw && parseInt(fw, 10) > 0 && parseInt(fw, 10) < 600);
// Un <b style="font-weight:normal"> annule le bold du tag (cas Google Docs).
if (simple === 'bold' && isNormalStyle) {
added.length = 0;
} else if (!simple && isBoldStyle && !added.some(m => m.type === 'bold')) {
added.push({ type: 'bold' });
}
if (style.fontStyle === 'italic' && !added.some(m => m.type === 'italic')) {
added.push({ type: 'italic' });
}
const decoLine = style.textDecorationLine || style.textDecoration || '';
if (decoLine.includes('underline') && !added.some(m => m.type === 'underline')) {
added.push({ type: 'underline' });
}
if (decoLine.includes('line-through') && !added.some(m => m.type === 'strike')) {
added.push({ type: 'strike' });
}
}
const nextMarks = added.length ? [...marks, ...added] : marks;
for (const child of node.childNodes) {
walk(child, nextMarks, out);