Invalid URL '': No schema supplied. Perhaps you meant http://?

Question

I want code a python3 script for split URLs and check a string with requests module in python

But some of URLs redirected By refresh meta tag i want python follow this link and a other problem is that : I when use for loop for(!) send request to URLs i receive

ERROR

if you look at the code below you'll find out better my mean

        fopen2 = open("clean url.txt", "r")
    splurl = fopen2.read().split('
')
    urlcln = []
    urlcln2 = []
    print(splurl)
    for i in splurl:
        getthis = requests.get(i)
        parserres = BeautifulSoup(getthis.text, 'html.parser')
        print(parserres)
        if "" in str(parserres):
            print('yes')
        else:
            print('no')
</code></pre>
<p>this is full of the source code...</p>
<pre><code>import time
import sys
import re
from tqdm.auto import tqdm
import requests
from bs4 import BeautifulSoup
# from colorama import Fore, Back, Style

try:

    def finish():
        print("""

        ==============================

        Duplicated URLs Remover With it Fathers!

        Developed Version ~ 1.0.3 By Ehsan Abafat
         _____ _                          _    _            __       _
        | ____| |__  ___  __ _ _ __      / \  | |__   __ _ / _| __ _| |_
        |  _| | '_ / __|/ _` | '_ \    / _ \ | '_ \ / _` | |_ / _` | __|
        | |___| | | \__ \ (_| | | | |  / ___ \| |_) | (_| |  _| (_| | |_
        |_____|_| |_|___/\__,_|_| |_| /_/   \_\_.__/ \__,_|_|  \__,_|\__|

        =============================

        clean URLs successfully!

        time that spent for this proccess:

        """)
        print(toc - tic, "S")

    if sys.argv[1] == '-h':
        print(100*"*")
        print('''

            => ~ Usage : Put dirty URLs in "old url.txt" file and call python3 source.py -?

                    -s : Fully Clean URLs
                    -d : Clean URLs with keeping "/"
                    -f : Clean Duplicated URLs Example : 1 2 3 2 => 1 2 3

           => ~ Developed Version ~ 1.0.3 By Ehsan Abafat

        ''')
        print(100*"*")
    elif sys.argv[1] == '-s':

        tic = time.time()
        f = open("old url.txt", "r")
        flisted = f.read().lower().replace('https', 'http').replace(
            'http://', '').replace('www.', '')
        SmartRemover = re.sub("/(\w+)?", "", flisted).split('
')
        listurl = []
        dupurl = ["
"]
        fullclean = []
        print('
 Getting Lines... 
')
        for i in tqdm(SmartRemover):
            if i in listurl:
                dupurl.append(i.strip())
            else:
                listurl.append(i.strip())
        print('
 Cleaning... 
')
        for i in tqdm(listurl):
            if i not in dupurl:
                fullclean.append(i)
        f.close()

        flast = open("clean url.txt", "w")
        for i in fullclean:
            if(i != '
' and i != '\s' and i != '' and len(i) > 2):
                flast.write('http://'+str(i)+'
')
        toc = time.time()
        finish()
        flast.close()
    elif sys.argv[1] == '-d':

        tic = time.time()
        f = open("old url.txt", "r")
        flisted = f.read().lower().replace('https', 'http').replace(
            'http://', '').replace('www.', '').split()
        listurl = []
        dupurl = ["
"]
        fullclean = []
        print('
 Getting Lines... 
')
        for i in tqdm(flisted):
            if i in listurl:
                dupurl.append(i.strip())
            else:
                listurl.append(i.strip())
        print('
 Cleaning... 
')
        for i in tqdm(listurl):
            if i not in dupurl:
                fullclean.append(i)
        f.close()

        flast = open("clean url.txt", "w")
        for i in fullclean:
            if(i != '
' and i != '\s' and i != '' and len(i) > 2):
                flast.write('http://'+str(i)+'
')
        toc = time.time()
        finish()
        flast.close()
    elif sys.argv[1] == '-f':

        tic = time.time()
        f = open("old url.txt", "r")
        flisted = f.read().lower().replace('https', 'http').replace(
            'http://', '').replace('www.', '').split()
        listurl = []
        dupurl = ["
"]
        fullclean = []
        print('
 Getting Lines... 
')
        for i in tqdm(flisted):
            if i in listurl:
                dupurl.append(i.strip())
            else:
                listurl.append(i.strip())
        print('
 Cleaning... 
')
        flast = open("clean url.txt", "w")
        for i in tqdm(listurl):
            print(i)
            if(i != '
' and i != '\s' and i != '' and len(i) > 2):
                flast.write('http://'+str(i)+'
')
        f.close()

        toc = time.time()
        finish()
        flast.close()
    else:
        print('unknown command! use python3 source.py -h')

    if len(sys.argv) == 2:
        pass
    elif len(sys.argv) == 3:
        telerikuiVul = '{ "message" : "RadAsyncUpload handler is registered succesfully, however, it may not be accessed directly." }'
        telerikBugCheckADR = "/Telerik.Web.UI.WebResource.axd?type=rau"
        fopen2 = open("clean url.txt", "r")
        splurl = fopen2.read().split('
')
        urlcln = []
        urlcln2 = []
        print(splurl)
        for i in splurl:
            getthis = requests.get(i)
            parserres = BeautifulSoup(getthis.text, 'html.parser')
            print(parserres)
            if "<title>" in str(parserres):
                print('yes')
            else:
                print('no')
    else:
        if(sys.argv[1] != '-h'):
            print("use '<Python3 source.py -h>' command")
        else:
            print('You are see Usage of This Script!')
except Exception as e:
    print(e)
</code></pre>

Invalid URL '': No schema supplied. Perhaps you meant http://?

Answers (1)

Related Questions

Invalid URL &#39;&#39;: No schema supplied. Perhaps you meant http://?

Answers (1)

Related Questions

Invalid URL '': No schema supplied. Perhaps you meant http://?