No3mani Mostafa
No3mani Mostafa

Reputation: 23

open new tab every loop Selenium

Hello all iam new in coding with python and i have this code for scraping data from facebook users but when i start the project every line from csv open in new tab

import requests 
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
import time
def dataset():
    dff = pd.read_csv(r"names.csv")
    dff=dff[dff.name.str.contains("[a-z,A-z]")==True]
    dff=dff[dff.name.str.contains("[@,0-9,/,-]")==True]
    dname=pd.concat([dff]) 
    return dname['name']
def facebookscrap(user):
    sections = {
        'photo_url': {'src':'//div[@id="objects_container"]//a/img[@alt][1]'},
    }
    driver = webdriver.Firefox(executable_path='facebook/geckodriver')
    driver.get("https://www.facebook.com")
    element = driver.find_element_by_id("email")
    element.send_keys('email')
    element = driver.find_element_by_id("pass")
    element.send_keys('password')
    element = driver.find_element_by_id("loginbutton")
    element.click()
    h=[]
    lh=[]
    mkk=[]
    for username in [l]:
        try:
            driver.get("https://mbasic.facebook.com/" + username)
name=driver.find_element_by_xpath('/html/body/div/div/div[2]/div/div[1]/div[1]/div[2]/div[1]/span/div/span/strong')
            d = {'name': name.text}
            x = driver.find_element_by_xpath
            xs = driver.find_elements_by_xpath
            for k,v in sections.items():
                    try:
                        if 'src' in v:
                            d[str(k)] = x(v['src']).get_attribute('src')
                        elif 'txt' in v:
                            d[str(k)] = x(v['txt']).text
                        elif 'href' in v:
                            d[str(k)] = x(v['href']).get_attribute('href')[8:].split('?')[0]
                        elif 'table' in v:
                            d['details'] = []
                            rows = xs(v['table']+'td[1])')
                            for i in range (1, len(rows)+1):
                                deets_key = x(v['table']+'td[1])'+'['+str(i)+']').text
                                deets_val = x(v['table']+'td[2])'+'['+str(i)+']').text
                                d['details'].append({deets_key:deets_val})
                        elif 'workedu' in v:
                            d[str(k)] = []
                            base = v['workedu']
                            rows = xs(base)
                            for i in range (1, len(rows)+1):
                                dd = {}
                                dd['link'] = x(base+'['+str(i)+']'+'/div/div[1]//a').get_attribute('href')[8:].split('&')[0].split('/')[0]
                                dd['org'] = x(base+'['+str(i)+']'+'/div/div[1]//a').text
                                dd['lines'] = []
                                lines = xs(base+'['+str(i)+']'+'/div/div[1]/div')
                                for l in range (2, len(lines)+1):
                                    line = x(base+'['+str(i)+']'+'/div/div[1]/div'+'['+str(l)+']').text
                                    dd['lines'].append(line)
                                d[str(k)].append(dd)
                        elif 'fam' in v:
                            d[str(k)] = []
                            base = v['fam']
                            rows = xs(base)
                            for i in range (1, len(rows)+1):
                                d[str(k)].append({
                                    'name': x(base+'['+str(i)+']'+'//h3[1]').text,
                                    'rel': x(base+'['+str(i)+']'+'//h3[2]').text,
                                    'alias': x(base+'['+str(i)+']'+'//h3[1]/a').get_attribute('href')[8:].split('?')[0]
                                })
                        elif 'life_events' in k:
                            d[str(k)] = []
                            base = v['years']
                            years = xs(base)
                            for i in range (1,len(years)+1):
                                year = x(base+'['+str(i)+']'+'/div[1]').text
                                events = xs(base+'['+str(i)+']'+'/div/div/a')
                                for e in range(1,len(events)+1):
                                    event = x('('+base+'['+str(i)+']'+'/div/div/a)'+'['+str(e)+']')
                                    d[str(k)].append({
                                        'year': year,
                                        'title': event.text,
                                        'link': event.get_attribute('href')[8:].split('refid')[0]
                                    })
                    except Exception:
                        pass
            lh.append(d)
            info_str = ""
            for key in d.keys():
                    h=[]
                    info_str = info_str + key.upper()+": "
                    if type(d[key]) is list:
                        info_str += "\n"
                        for itm in d[key]:
                            if type(itm) is dict:
                                #print(itm)
                                for kff in itm.keys():
                                    info_str = info_str + "\t"+kff.upper()+": "+str(itm[kff])+"\n"
                    else:
                        info_str = info_str + d[key]+"\n"
                    h.append(info_str)
                    mkk.append(info_str)
        except:
            pass
    return(pd.DataFrame(lh))
def ifd():
    if(len(fbdb())==0):
        df['id']=[i for i in range(len(df))]
    else:
        df['id'] = [i+1 for i in range(max(fbdb().id),len(df)+max(fbdb().id))]
    return df['id']
def connectdb():
    from pymongo import MongoClient as client
    connect = client('mongodb://localhost:27017/')
    db=connect.osint
    return db
def update():   
        y= connectdb()
        fd=y['fund_facebook']
        import json
        records = json.loads(df.T.to_json()).values()
        for r in records:
            fd.insert(r)
def fbdb():
    y= connectdb()
    df=y['fund_facebook']
    k = []
    for x in df.find():
        k.append(x)
    df = pd.DataFrame(k)
    try:
        df=df.drop('_id',axis=1)
    except:
        pass
    return df
if __name__=='__main__':
    start = time.time()
    for i in dataset():
        df=facebookscrap(i)
        time.sleep(30)
        ifd()
        update()

Required Output search where is the proplem for open all links in the same tab of selenium like copy the link to the address bar and open it Where is the problem? sorry for my bad english

Upvotes: 1

Views: 330

Answers (1)

arnaud
arnaud

Reputation: 3473

Replace

for username in l

With

for username in l.split()

Indeed, "Hello world".split() equals ["Hello", "world"] while iterating directly on string "Hello world" will be like iterating on ["H", "e", "l", "l", "o", " ", "w", "o", "r", "l", "d"].

Upvotes: 1

Related Questions