6:[["$","$Le",null,{}],["$","div",null,{"className":"min-h-screen bg-gray-100 p-6","children":[["$","$Lf",null,{}],["$","script",null,{"type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"{\"@context\":\"https://schema.org\",\"@type\":\"QAPage\",\"mainEntity\":{\"@type\":\"Question\",\"name\":\"extract data from html code\",\"text\":\"

I want to extract data into div tags using BeautifulSoup :

\\n\\n

<div class=\\\"post contentTemplate\\\" itemprop=\\\"text\\\">Data to extract<div class=\\\"clear\\\"></div></div>\\n

\\n\",\"author\":{\"@type\":\"Person\",\"name\":\"Ray En\"},\"upvoteCount\":1,\"answerCount\":2,\"acceptedAnswer\":{\"@type\":\"Answer\",\"text\":\"

You can try something like this:

\\n\\n

from bs4 import BeautifulSoup as bs\\n\\ndata = '<div class=\\\"post contentTemplate\\\" itemprop=\\\"text\\\">Data to extract<div class=\\\"clear\\\"></div></div>'\\nsoup = bs(data)\\nm = soup.findAll(\\\"div\\\", {\\\"class\\\": \\\"post contentTemplate\\\"})\\nfor k in m:\\n    print(k.get_text())\\n

\\n\\n

Output:

\\n\\n

Data to extract\\n

\\n\",\"author\":{\"@type\":\"Person\",\"name\":\"Chiheb Nexus\"},\"upvoteCount\":1}}}"}}],["$","div",null,{"className":"bg-white shadow-md rounded-lg p-6 mb-6 relative","children":[["$","div",null,{"className":"absolute top-4 right-4 flex flex-wrap space-x-2","children":[["$","span","python",{"className":"bg-blue-600 text-white text-sm px-3 py-1 rounded-full","children":["$","$L10",null,{"href":"/discussion/tag/python/1","children":"python"}]}],["$","span","beautifulsoup",{"className":"bg-blue-600 text-white text-sm px-3 py-1 rounded-full","children":["$","$L10",null,{"href":"/discussion/tag/beautifulsoup/1","children":"beautifulsoup"}]}]]}],["$","div",null,{"className":"flex items-center mb-4","children":[["$","img",null,{"src":"https://graph.facebook.com/299769310439731/picture?type=large","alt":"Ray En","className":"w-16 h-16 rounded-full border"}],["$","div",null,{"className":"ml-4","children":[["$","a",null,{"href":"https://stackoverflow.com/users/7630477/ray-en","target":"_blank","rel":"noopener noreferrer","className":"text-lg font-semibold text-blue-600 hover:underline","children":"Ray En"}],["$","p",null,{"className":"text-sm text-gray-500","children":["Reputation: ",83]}]]}]]}],["$","h1",null,{"className":"text-2xl font-bold text-gray-800 mb-4","children":"extract data from html code"}],["$","p",null,{"className":"text-gray-700 mt-4","dangerouslySetInnerHTML":{"__html":"

I want to extract data into div tags using BeautifulSoup :

\n\n

<div class=\"post contentTemplate\" itemprop=\"text\">Data to extract<div class=\"clear\"></div></div>\n

\n"}}],["$","div",null,{"className":"text-gray-600 text-sm mt-4","children":[["$","p",null,{"children":["Upvotes: ",1]}],["$","p",null,{"children":["Views: ",59]}]]}]]}],["$","div",null,{"className":"container mx-auto","children":[["$","h2",null,{"className":"text-2xl font-semibold text-gray-800 mb-6","children":["Answers (",2,")"]}],[["$","div","43295670",{"className":"bg-white shadow-md rounded-lg p-6 mb-6","children":[["$","div",null,{"className":"flex items-center mb-4","children":[["$","img",null,{"src":"https://i.sstatic.net/2l63X.jpg?s=256","alt":"Chiheb Nexus","className":"w-12 h-12 rounded-full border"}],["$","div",null,{"className":"ml-4","children":[["$","a",null,{"href":"https://stackoverflow.com/users/3926995/chiheb-nexus","target":"_blank","rel":"noopener noreferrer","className":"text-lg font-semibold text-blue-600 hover:underline","children":"Chiheb Nexus"}],["$","p",null,{"className":"text-sm text-gray-500","children":["Reputation: ",9257]}]]}]]}],["$","p",null,{"className":"text-gray-700 mb-4","dangerouslySetInnerHTML":{"__html":"

You can try something like this:

\n\n

from bs4 import BeautifulSoup as bs\n\ndata = '<div class=\"post contentTemplate\" itemprop=\"text\">Data to extract<div class=\"clear\"></div></div>'\nsoup = bs(data)\nm = soup.findAll(\"div\", {\"class\": \"post contentTemplate\"})\nfor k in m:\n    print(k.get_text())\n

\n\n

Output:

\n\n

Data to extract\n

\n"}}],["$","div",null,{"className":"text-gray-600 text-sm","children":["$","p",null,{"children":["Upvotes: ",1]}]}]]}],["$","div","43295557",{"className":"bg-white shadow-md rounded-lg p-6 mb-6","children":[["$","div",null,{"className":"flex items-center mb-4","children":[["$","img",null,{"src":"https://i.sstatic.net/K96NO.jpg?s=256","alt":"odradek","className":"w-12 h-12 rounded-full border"}],["$","div",null,{"className":"ml-4","children":[["$","a",null,{"href":"https://stackoverflow.com/users/5348860/odradek","target":"_blank","rel":"noopener noreferrer","className":"text-lg font-semibold text-blue-600 hover:underline","children":"odradek"}],["$","p",null,{"className":"text-sm text-gray-500","children":["Reputation: ",1001]}]]}]]}],["$","p",null,{"className":"text-gray-700 mb-4","dangerouslySetInnerHTML":{"__html":"

you can use the get_text() method. this will extract all text from every div that find_all() finds in the source code.

\n\n

data = [e.get_text() for e in html.find_all('div')]\n

\n\n

when run it returns:

\n\n

[u'Data to extract', u'']\n

\n\n

if you don't want the empty values just filter them out.

\n\n

data = [e.get_text() for e in html.find_all('div') if e.get_text()]\n

\n"}}],["$","div",null,{"className":"text-gray-600 text-sm","children":["$","p",null,{"children":["Upvotes: ",0]}]}]]}]]]}],["$","div",null,{"className":"bg-white shadow-md rounded-lg p-6 mt-6","children":[["$","h2",null,{"className":"text-2xl font-semibold text-gray-800 mb-4","children":"Related Questions"}],["$","ul",null,{"className":"list-disc list-inside","children":[["$","li","62000520",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/62000520","className":"text-blue-600 hover:underline","children":"Extracting HTML data using Python"}]}],["$","li","60133804",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/60133804","className":"text-blue-600 hover:underline","children":"Beautiful soup extract data from HTML sources code?"}]}],["$","li","59776496",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/59776496","className":"text-blue-600 hover:underline","children":"Getting data from complex html tag with Python Beautifulsoup"}]}],["$","li","57692197",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/57692197","className":"text-blue-600 hover:underline","children":"Extract data from html using beautifulsoup"}]}],["$","li","49987479",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/49987479","className":"text-blue-600 hover:underline","children":"Extracting values from HTML with Python"}]}],["$","li","46273600",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/46273600","className":"text-blue-600 hover:underline","children":"Extracting string data from a html source"}]}],["$","li","23601323",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/23601323","className":"text-blue-600 hover:underline","children":"Extract data from html page using Beautifulsoup"}]}],["$","li","36661357",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/36661357","className":"text-blue-600 hover:underline","children":"Extracting specific information from fetched HTML code using python"}]}],["$","li","35251091",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/35251091","className":"text-blue-600 hover:underline","children":"How to extract data form the below HTML code using beautifulsoup?"}]}],["$","li","29239125",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/29239125","className":"text-blue-600 hover:underline","children":"How can I extract data from a html tag using Python?"}]}]]}]]}]]}],["$","$L11",null,{}],["$","$L12",null,{}],["$","$L13",null,{}],["$","$L14",null,{}],["$","$L15",null,{}]]

extract data from html code

Answers (2)

Related Questions