Reputation: 27
I'm using python code to read from many csv files and set encoding to utf8.I meet the problem when I read the file I can read all lines but when I write it, it can write only 1 line. Please help me to check my code as below:
def convert_files(files, ascii, to="utf-8"):
for name in files:
#print ("Convert {0} from {1} to {2}").format(name, ascii, to)
with open(name) as f:
print(name)
count = 0
lineno = 0
#this point I want to write the below text into my each new file at the first line
#file_source.write('id;nom;prenom;nom_pere;nom_mere;prenom_pere;prenom_mere;civilite (1=homme 2=f);date_naissance;arrondissement;adresse;ville;code_postal;pays;telephone;email;civilite_demandeur (1=homme 2=f);nom_demandeur;prenom_demandeur;qualite_demandeur;type_acte;nombre_actes\n')
for line in f.readlines():
lineno +=1
if lineno == 1 :
continue
file_source = open(name, mode='w', encoding='utf-8', errors='ignore')
#pass
#print (line)
# start write data to to new file with encode
file_source.write(line)
#file_source.close
#print unicode(line, "cp866").encode("utf-8")
csv_files = find_csv_filenames('./csv', ".csv")
convert_files(csv_files, "cp866")
Upvotes: 0
Views: 1340
Reputation: 126
You can do this
def convert_files(files, ascii, to="utf-8"):
for name in files:
with open(name, 'r+') as f:
data = ''.join(f.readlines())
data.decode(ascii).encode(to)
f.seek(0)
f.write(data)
f.truncate()
Upvotes: 0
Reputation: 414245
If all you need is to change the character encoding of the files then it doesn't matter that they are csv files unless the conversion may change what characters are interpreted as delimiter, quotechar, etc:
def convert(filename, from_encoding, to_encoding):
with open(filename, newline='', encoding=from_encoding) as file:
data = file.read().encode(to_encoding)
with open(filename, 'wb') as outfile:
outfile.write(data)
for path in csv_files:
convert(path, "cp866", "utf-8")
Add errors
parameter to change how encoding/decoding errors are handled.
If files may be large then you could convert data incrementally:
import os
from shutil import copyfileobj
from tempfile import NamedTemporaryFile
def convert(filename, from_encoding, to_encoding):
with open(filename, newline='', encoding=from_encoding) as file:
with NamedTemporaryFile('w', encoding=to_encoding, newline='',
dir=os.path.dirname(filename)) as tmpfile:
copyfileobj(file, tmpfile)
tmpfile.delete = False
os.replace(tmpfile.name, filename) # rename tmpfile -> filename
for path in csv_files:
convert(path, "cp866", "utf-8")
Upvotes: 0
Reputation: 1358
You're reopening the file during every iteration.
for line in f.readlines():
lineno +=1
if lineno == 1 :
continue
#move the following line outside of the for block
file_source = open(name, mode='w', encoding='utf-8', errors='ignore')
Upvotes: 1