toHo
toHo

Reputation: 438

Python - csvwriter emprty result, but print works

I wrote simple script to grab unicode text from website and want the result written in a file row by row. The code works, apart from writing to file function. print(item) works perfectly fine and gives exact result I need.

import requests
import unicodecsv, os
from bs4 import BeautifulSoup
import re

countries = ["ar","th","bn","my","chin","de","es","fr","hi","ja","ko","pt","ru","th","tr","vi","zh"]

f = open("lesson_list.txt","wb")
w = unicodecsv.writer(f, encoding='utf-8', delimiter=',', quotechar='"')

for country in countries:

    toi = 1
    print country

    while toi<101:
        print toi,
        url = "http://www.englishspeak.com/"+ country +"/english-lesson.cfm?lessonID=" + str(toi)
        r = requests.get(url)
        soup = BeautifulSoup(r.content)
        soup.unicode

        titles = soup.find_all('font', {"color": "#006633"})

        data = [0]

        for index, item in enumerate(titles):
            tmp = titles[index].encode("utf-8")
            replaced = re.sub(r'<font color="#006633" face="Verdana" size="4">', ' ', tmp)
            replaced = re.sub(r'\n', ' ', replaced)
            replaced = re.sub(r'\r', ' ', replaced)
            replaced = re.sub(r'</font>', ' ', replaced)
            replaced = re.sub(r'\s+', ' ', replaced)
            data[index] = replaced

        toi += 1

        for index, item in enumerate(data):
            print(item)
            w.writerow(item)

Upvotes: 0

Views: 71

Answers (2)

Anzel
Anzel

Reputation: 20583

a quick look here, your list is [0], and once your enumerate past index 1, it's not storing anything in it.

# try rewriting this
data = []

for index, item in enumerate(titles):
    tmp = titles[index].encode("utf-8")
    replaced = re.sub(r'<font color="#006633" face="Verdana" size="4">', ' ', tmp)
    replaced = re.sub(r'\n', ' ', replaced)
    replaced = re.sub(r'\r', ' ', replaced)
    replaced = re.sub(r'</font>', ' ', replaced)
    replaced = re.sub(r'\s+', ' ', replaced)
    data.append(replaced) # use append to add replaced to the list
...
# and writerow with data
print data # if you want to see the data on each loop
w.writerow(data)

Upvotes: 1

Naveed
Naveed

Reputation: 19

i guess it should serve the purpose:

import csv
import re
import requests

from bs4 import BeautifulSoup

countries = ["ar", "th", "bn", "my", "chin", "de", "es", "fr", "hi", "ja", "ko",
             "pt", "ru", "th", "tr", "vi", "zh"]

FILENAME = "lesson_list.txt"


def run():
    for country in countries:
        toi = 1
        while toi < 101:
            url = "http://www.englishspeak.com/" + country + "/english-lesson.cfm?lessonID=" + str(toi)
            r = requests.get(url)
            soup = BeautifulSoup(r.content)
            soup.unicode

            titles = soup.find_all('font', {"color": "#006633"})
            data = []
            for index, item in enumerate(titles):
                replaced = re.sub('<[^>]*>|\n|\r', '', titles[index].encode("utf-8"))
                data.append(replaced)

            toi += 1
            print data, "item"

        csv_export(data)


def csv_export(data, file_name=FILENAME):
    file_item = open(file_name, 'wb')
    dict_writer = csv.writer(file_item)
    dict_writer.writerows(data)

if __name__ == '__main__':
    run()

Upvotes: 1

Related Questions