6:[["$","$Le",null,{}],["$","div",null,{"className":"min-h-screen bg-gray-100 p-6","children":[["$","$Lf",null,{}],["$","script",null,{"type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"{\"@context\":\"https://schema.org\",\"@type\":\"QAPage\",\"mainEntity\":{\"@type\":\"Question\",\"name\":\"Need to extract all links from script tag HTML Python\",\"text\":\"

Basically i need to parse all src=\\\"\\\" links from all <script> tags in HTML.

\\n\\n

<script src=\\\"path/to/example.js\\\" type=\\\"text/javascript\\\"></script>

\\n\\n

Unfortunately, bs4 cannot do that. Any ideas how can i achieve this?

\\n\",\"author\":{\"@type\":\"Person\",\"name\":\"Razeel\"},\"upvoteCount\":2,\"answerCount\":2,\"acceptedAnswer\":{\"@type\":\"Answer\",\"text\":\"

import requests\\nimport bs4\\ntext = requests.get('http://example.com').text\\nsoup = bs4.BeautifulSoup(text, features='html.parser')\\nscripts = soup.find_all('script')\\nsrcs = [link['src'] for link in scripts if 'src' in link.attrs]\\nprint(srcs)\\n

\\n\",\"author\":{\"@type\":\"Person\",\"name\":\"Alex Hall\"},\"upvoteCount\":4}}}"}}],["$","div",null,{"className":"bg-white shadow-md rounded-lg p-6 mb-6 relative","children":[["$","div",null,{"className":"absolute top-4 right-4 flex flex-wrap space-x-2","children":[["$","span","python",{"className":"bg-blue-600 text-white text-sm px-3 py-1 rounded-full","children":["$","$L10",null,{"href":"/discussion/tag/python/1","children":"python"}]}],["$","span","html",{"className":"bg-blue-600 text-white text-sm px-3 py-1 rounded-full","children":["$","$L10",null,{"href":"/discussion/tag/html/1","children":"html"}]}],["$","span","parsing",{"className":"bg-blue-600 text-white text-sm px-3 py-1 rounded-full","children":["$","$L10",null,{"href":"/discussion/tag/parsing/1","children":"parsing"}]}]]}],["$","div",null,{"className":"flex items-center mb-4","children":[["$","img",null,{"src":"https://www.gravatar.com/avatar/c873275347b0f1c85e137e69fd9d8a71?s=256&d=identicon&r=PG&f=y&so-version=2","alt":"Razeel","className":"w-16 h-16 rounded-full border"}],["$","div",null,{"className":"ml-4","children":[["$","a",null,{"href":"https://stackoverflow.com/users/11561926/razeel","target":"_blank","rel":"noopener noreferrer","className":"text-lg font-semibold text-blue-600 hover:underline","children":"Razeel"}],["$","p",null,{"className":"text-sm text-gray-500","children":["Reputation: ",47]}]]}]]}],["$","h1",null,{"className":"text-2xl font-bold text-gray-800 mb-4","children":"Need to extract all links from script tag HTML Python"}],["$","p",null,{"className":"text-gray-700 mt-4","dangerouslySetInnerHTML":{"__html":"

Basically i need to parse all src=\"\" links from all <script> tags in HTML.

\n\n

<script src=\"path/to/example.js\" type=\"text/javascript\"></script>

\n\n

Unfortunately, bs4 cannot do that. Any ideas how can i achieve this?

\n"}}],["$","div",null,{"className":"text-gray-600 text-sm mt-4","children":[["$","p",null,{"children":["Upvotes: ",2]}],["$","p",null,{"children":["Views: ",1861]}]]}]]}],["$","div",null,{"className":"container mx-auto","children":[["$","h2",null,{"className":"text-2xl font-semibold text-gray-800 mb-6","children":["Answers (",2,")"]}],[["$","div","56347085",{"className":"bg-white shadow-md rounded-lg p-6 mb-6","children":[["$","div",null,{"className":"flex items-center mb-4","children":[["$","img",null,{"src":"https://i.sstatic.net/Z7ztW.jpg?s=256","alt":"QHarr","className":"w-12 h-12 rounded-full border"}],["$","div",null,{"className":"ml-4","children":[["$","a",null,{"href":"https://stackoverflow.com/users/6241235/qharr","target":"_blank","rel":"noopener noreferrer","className":"text-lg font-semibold text-blue-600 hover:underline","children":"QHarr"}],["$","p",null,{"className":"text-sm text-gray-500","children":["Reputation: ",84465]}]]}]]}],["$","p",null,{"className":"text-gray-700 mb-4","dangerouslySetInnerHTML":{"__html":"

I would condense and use script[src] to ensure script has src attribute

\n\n

import requests\nfrom bs4 import BeautifulSoup as bs\nr = requests.get('http://example.com').content\nsoup = bs(r, 'lxml') # 'html.parser' if lxml not installed\nsrcs = [item['src'] for item in soup.select('script[src]')]\n

\n"}}],["$","div",null,{"className":"text-gray-600 text-sm","children":["$","p",null,{"children":["Upvotes: ",1]}]}]]}],["$","div","56343177",{"className":"bg-white shadow-md rounded-lg p-6 mb-6","children":[["$","div",null,{"className":"flex items-center mb-4","children":[["$","img",null,{"src":"https://i.sstatic.net/OQPNY.png?s=256","alt":"Alex Hall","className":"w-12 h-12 rounded-full border"}],["$","div",null,{"className":"ml-4","children":[["$","a",null,{"href":"https://stackoverflow.com/users/2482744/alex-hall","target":"_blank","rel":"noopener noreferrer","className":"text-lg font-semibold text-blue-600 hover:underline","children":"Alex Hall"}],["$","p",null,{"className":"text-sm text-gray-500","children":["Reputation: ",36043]}]]}]]}],["$","p",null,{"className":"text-gray-700 mb-4","dangerouslySetInnerHTML":{"__html":"

import requests\nimport bs4\ntext = requests.get('http://example.com').text\nsoup = bs4.BeautifulSoup(text, features='html.parser')\nscripts = soup.find_all('script')\nsrcs = [link['src'] for link in scripts if 'src' in link.attrs]\nprint(srcs)\n

\n"}}],["$","div",null,{"className":"text-gray-600 text-sm","children":["$","p",null,{"children":["Upvotes: ",4]}]}]]}]]]}],["$","div",null,{"className":"bg-white shadow-md rounded-lg p-6 mt-6","children":[["$","h2",null,{"className":"text-2xl font-semibold text-gray-800 mb-4","children":"Related Questions"}],["$","ul",null,{"className":"list-disc list-inside","children":[["$","li","9694769",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/9694769","className":"text-blue-600 hover:underline","children":"How to use Python's HTMLParser to extract specific links"}]}],["$","li","50676937",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/50676937","className":"text-blue-600 hover:underline","children":"Parsing href using python"}]}],["$","li","44642452",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/44642452","className":"text-blue-600 hover:underline","children":"How to extract links from HTML (with python)"}]}],["$","li","38298995",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/38298995","className":"text-blue-600 hover:underline","children":"Extract links from html page"}]}],["$","li","6816138",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/6816138","className":"text-blue-600 hover:underline","children":"Extract absolute links from a page using HTMLParser"}]}],["$","li","27374846",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/27374846","className":"text-blue-600 hover:underline","children":"Parse specific links in html using HTMLParser in python?"}]}],["$","li","26105305",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/26105305","className":"text-blue-600 hover:underline","children":"Extracting links with regex from source code; Python"}]}],["$","li","24472957",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/24472957","className":"text-blue-600 hover:underline","children":"Get relative links from html page"}]}],["$","li","18455991",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/18455991","className":"text-blue-600 hover:underline","children":"Extracting links from HTML in Python"}]}],["$","li","6487018",{"className":"mb-2","children":["$","$L10",null,{"href":"/discussion/solution/6487018","className":"text-blue-600 hover:underline","children":"How to extract from html page links for javascript, css and img tags?"}]}]]}]]}]]}],["$","$L11",null,{}],["$","$L12",null,{}],["$","$L13",null,{}],["$","$L14",null,{}],["$","$L15",null,{}]]

Need to extract all links from script tag HTML Python

Answers (2)

Related Questions