user16832397
user16832397

Reputation:

how to delet specific character from a list in the loop in python

this is the part of my code to retrive data from canadian stat data according to the chanpter and save them in the excel form then open these excel form and change them and save it in the text form.

the problem for me is that in the first column I want to eleminate - from it. I tried alot but it does'nt work at all.

    # Commodity: Specific Chapter
ch_numbers = ["02","04","10","11","12","15","20","23","38"]
startrow = 0
for ch_number in ch_numbers:
    dropdown_rC = driver.find_element_by_id('report_chapters')
    dd_rC = Select(dropdown_rC)
    dd_rC.select_by_value(ch_number) 

    #  for retriving the data and store it in the excel form
    driver.find_element(By.XPATH,'//*[@id="report"]/div[1]/div[3]/div[5]/p[2]/button').click()
    time.sleep(2)
    Canada_Result=[]
    pages = len(driver.find_elements_by_xpath('//a[@class="paginate_button" or @class="paginate_button current" and @title]'))
    pages = driver.find_element_by_xpath('//a[@onclick and @class="paginate_button" or @class="paginate_button current" and @title][%d]' % (pages)).text.strip("Page\n")

    if pages == '':
        pages = 1
    
    for J in range (int(pages)):
        year_month = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[1]') # the year details in each row   //*[@id="report_table"]/tbody/tr[1]/td[1]
        commodities = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[2]/a')
        Countries = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[4]')
        quantities = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[7]')
        weights = driver.find_elements_by_xpath('.//*[@id="report_table"]/tbody/tr["i"]/td[8]/abbr')
      
        time.sleep(1)

        period_entries = len(commodities)
        for i in range(period_entries):
            temporary_data= {'year':year_month[i].text,'Commodity':commodities[i].text,'Country':Countries[i].text,'quantity':quantities[i].text,'weight': weights[i].text }
            Canada_Result.append(temporary_data)
        
        df_data = pd.DataFrame(Canada_Result)

        df_data.to_excel('Canada_scrapping_result_'+ str(ch_number) +'.xlsx', index=False, header=0)

        if J == int(pages) - 1:
            print("Done")
            break
        # click on the Next button
        driver.find_element(By.XPATH,'//*[@id="report_results_next"]').click()
        time.sleep(1)

    # ----------for convert the data in excel and save it again---------------
    df = pd.read_excel('Canada_scrapping_result_'+ str(ch_number) +'.xlsx') 

    
    



    df.to_csv('Remove_characters_'+str(ch_number)+'.txt', index=False, header=0)


    time.sleep(2)
    # Click Submit
    driver.find_element(By.XPATH,'//*[@id="report"]/div[1]/div[3]/div[5]/p[2]/button').click() # For going to the next page
    time.sleep(2)

Upvotes: 1

Views: 67

Answers (2)

Barmar
Barmar

Reputation: 781761

Use the pandas .str.replace() method.

df['year'] = df['year'].str.replace('-', '')

Upvotes: 1

Michael S.
Michael S.

Reputation: 3128

If they are string values, you can use replace in list comprehension for each row:

#### Mock dataset
data = [["2022-01-01" , "0207.14.93.00" , "United States" , 2000 , "KGM"], 
        ["2022-01-01" , "0407.14.03.00" , "United States" , 3400, "KGM"]]
df = pd.DataFrame(data, columns = ["Date", "Num1", "Country", "Num1", "Col"])
#############

df.Date = [x.replace("-", "") for x in df.Date]
df

Output:

        Date    Num1            Country         Num1    Col
0   20220101    0207.14.93.00   United States   2000    KGM
1   20220101    0407.14.03.00   United States   3400    KGM

If they are loaded in as Timestamps, then try this:

df.Date = df.Date.dt.strftime('%Y%m%d')

Upvotes: 0

Related Questions