Peter Johnmeyer
Peter Johnmeyer

Reputation: 109

How would one extract HTML client-side from a child shadow DOM node, while including other shadow-root elements within?

Per the question, how should one go about completely extracting the client-side HTML code from an encapsulated shadow DOM node which also contains further nested child shadow nodes?

For reference: other questions on Stack Overflow I've visited which had answers that did not help: 1 2 3 4 5 6

I initially used the following code, but it failed to scrape any of the nested shadow DOMs:

const getDeepShadowDomHtml = (element) => {
    let htmlContent = '';

    // Recursively capture all shadow DOMs and their content
    const processElement = (el) => {
        if (el.shadowRoot) {
            htmlContent += `<${el.tagName.toLowerCase()}${Array.from(el.attributes).map(attr => ` ${attr.name}="${attr.value}"`).join('')}>`;
            Array.from(el.shadowRoot.childNodes).forEach(child => processElement(child));
            htmlContent += `</${el.tagName.toLowerCase()}>`;
        } else {
            htmlContent += el.nodeValue || el.outerHTML || '';
        }
    };

    processElement(element);
    return htmlContent;
};

const findNestedShadowRoot = (startElement, selector) => {
    let element = startElement;
    const selectors = selector.split(' ').filter(Boolean);

    for (const sel of selectors) {
        element = element.shadowRoot.querySelector(sel);
        if (!element) break;
    }

    return element;
};

const behaviorTabElement = findNestedShadowRoot(document.querySelector('shadow-host-selector'), 'first-shadow-selector second-shadow-selector #behaviourtab');

if (behaviorTabElement) {
    const shadowDomContent = getDeepShadowDomHtml(behaviorTabElement);
    console.log(shadowDomContent); // Verify content before download

    const blob = new Blob([shadowDomContent], { type: 'text/html' });
    const url = URL.createObjectURL(blob);
    const a = document.createElement('a');
    a.href = url;
    a.download = 'behaviorTabShadowDOMContent.html';
    document.body.appendChild(a);
    a.click();
    URL.revokeObjectURL(url);
} else {
    console.log("The #behaviourtab element was not found.");
}

Upvotes: 1

Views: 58

Answers (1)

Peter Johnmeyer
Peter Johnmeyer

Reputation: 109

The following code worked for me. Comments are included in the code.

// jQuery for good measure
var script document.createElement('script');
script.src = 'https://code.jquery.com/jquery-latest.min.js';
document.getElementsByTagName('head')[0].appendChild(script);

const captureShadowDom = (element) => {
  let htmlContent = '';

  const processNode = (node) => {
    if (node.nodeType === Node.ELEMENT_NODE) {
      let nodeHtml = `<${node.tagName.toLowerCase()}`;

      // Captures element attributes
      for (const attr of node.attributes) {
        nodeHtml += ` ${attr.name}="${attr.value}"`;
      }
      nodeHtml += '>';

      // Checks for shadow DOM to recursively capture its content
      if (node.shadowRoot) {
        nodeHtml += captureShadowDom(node.shadowRoot);
      }

      // Captures the children of this element
      for (const child of node.childNodes) {
        nodeHtml += processNode(child);
      }

      nodeHtml += `</${node.tagName.toLowerCase()}>`;
      return nodeHtml;
    }

    // Text or other node types
    return node.nodeValue || '';
  };

  // Starts processing the root element
  for (const child of element.childNodes) {
    htmlContent += processNode(child);
  }

  return htmlContent;
};

// Finds the further nested element
const findNestedShadowRoot = (startElement, selector) => {
  let element = startElement;
  const selectors = selector.split(' ').filter(Boolean);

  for (const sel of selectors) {
    element = element.shadowRoot.querySelector(sel);
    if (!element) break;
  }

  return element;
};

// Update with the correct shadow DOM hierarchy
const behaviorTabElement = findNestedShadowRoot(document.querySelector('shadow-host-selector'), 'first-shadow-selector second-shadow-selector main-selector');

console.log(behaviorTabElement); // Check if the element is found
if (behaviorTabElement) {
  const shadowDomContent = collectShadowDomContent(behaviorTabElement);
  console.log(shadowDomContent); // Check if content is being collected
}

if (behaviorTabElement) {
  const shadowDomContent = captureShadowDom(behaviorTabElement.shadowRoot);
  console.log(shadowDomContent); // Debug output

  const blob = new Blob([shadowDomContent], {
    type: 'text/html'
  });
  const url = URL.createObjectURL(blob);
  const a = document.createElement('a');
  a.href = url;
  a.download = 'ShadowDOMContent.html';
  document.body.appendChild(a);
  a.click();
  URL.revokeObjectURL(url);
} else {
  console.log("The element was not found.");
}

This may not be the most efficient code for this and I'm not doing this by profession but just for fun, so if anyone has a better working method, feel free to improve on this!

Upvotes: -1

Related Questions