Reputation: 13
I have a python script that process a data file :
out = open('result/process/'+name+'.res','w')
out.write("source,rssi,lqi,packetId,run,counter\n")
f = open('result/resultat0.res','r')
for ligne in [x for x in f if x != '']:
chaine = ligne.rstrip('\n')
tmp = chaine.split(',')
if (len(tmp) == 6 ):
out.write(','.join(tmp)+"\n")
f.close()
The complete code is here
I use this script on several computers and the behavior is not the same. On the first computer, with python 2.6.6, the result is what I expect. However, on the others (python 2.6.6, 3.3.2, 2.7.5) the write method of file object puts null bytes instead of the values I want during the most part of the processing. I get this result :
$ hexdump -C result/process/1.res
00000000 73 6f 75 72 63 65 2c 72 73 73 69 2c 6c 71 69 2c |source,rssi,lqi,|
00000010 70 61 63 6b 65 74 49 64 2c 72 75 6e 2c 63 6f 75 |packetId,run,cou|
00000020 6e 74 65 72 0a 00 00 00 00 00 00 00 00 00 00 00 |nter............|
00000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
0003a130 00 00 00 00 00 00 00 00 00 00 31 33 2c 36 35 2c |..........13,65,|
0003a140 31 34 2c 38 2c 39 38 2c 31 33 31 34 32 0a 31 32 |14,8,98,13142.12|
0003a150 2c 34 37 2c 31 37 2c 38 2c 39 38 2c 31 33 31 34 |,47,17,8,98,1314|
0003a160 33 0a 33 2c 34 35 2c 31 38 2c 38 2c 39 38 2c 31 |3.3,45,18,8,98,1|
0003a170 33 31 34 34 0a 31 31 2c 38 2c 32 33 2c 38 2c 39 |3144.11,8,23,8,9|
0003a180 38 2c 31 33 31 34 35 0a 39 2c 32 30 2c 32 32 2c |8,13145.9,20,22,|
Have you an idea how to resolve this problem please ?
Upvotes: 0
Views: 1357
Reputation: 38247
With the following considerations:
global
. Pass arguments to functions instead.Here's an (untested) attempt at refactoring your code for sanity, assumes that you have enough memory available to hold all of the lines under a particular identifier.
If you have null bytes in your result files after this refactoring then we have reasonable basis to proceed with debugging.
import os
import re
from contextlib import closing
def list_files_to_process(directory='results'):
"""
Return a list of files from directory where the file extension is '.res',
case insensitive.
"""
results = []
for filename in os.listdir(directory):
filepath = os.path.join(directory,filename)
if os.path.isfile(filepath) and filename.lower().endswith('.res'):
results.append(filepath)
return results
def group_lines(sequence):
"""
Generator, process a sequence of lines, separated by a particular line.
Yields batches of lines along with the id from the separator.
"""
separator = re.compile('^A:(?P<id>\d+):$')
batch = []
batch_id = None
for line in sequence:
if not line: # Ignore blanks
continue
m = separator.match(line):
if m is not None:
if batch_id is not None or len(batch) > 0:
yield (batch_id,batch)
batch_id = m.group('id')
batch = []
else:
batch.append(line)
if batch_id is not None or len(batch) > 0:
yield (batch_id,batch)
def filename_for_results(batch_id,result_directory):
"""
Return an appropriate filename for a batch_id under the result directory
"""
return os.path.join(result_directory,"results-%s.res" % (batch_id,))
def open_result_file(filename,header="source,rssi,lqi,packetId,run,counter"):
"""
Return an open file object in append mode, having appended a header if
filename doesn't exist or is empty
"""
if os.path.exists(filename) and os.path.getsize(filename) > 0:
# No need to write header
return open(filename,'a')
else:
f = open(filename,'a')
f.write(header + '\n')
return f
def process_file(filename,result_directory='results/processed'):
"""
Open filename and process it's contents. Uses group_lines() to group
lines into different files based upon specific line acting as a
content separator.
"""
error_filename = filename_for_results('error',result_directory)
with open(filename,'r') as in_file, open(error_filename,'w') as error_out:
for batch_id, lines in group_lines(in_file):
if len(lines) == 0:
error_out.write("Received batch %r with 0 lines" % (batch_id,))
continue
out_filename = filename_for_results(batch_id,result_directory)
with closing(open_result_file(out_filename)) as out_file:
for line in lines:
if line.startswith('L') and line.endswith('E') and line.count(',') == 5:
line = line.lstrip('L').rstrip('E')
out_file.write(line + '\n')
else:
error_out.write("Unknown line, batch=%r: %r\n" %(batch_id,line))
if __name__ == '__main__':
files = list_files_to_process()
for filename in files:
print "Processing %s" % (filename,)
process_file(filename)
Upvotes: 2