crysis405
crysis405

Reputation: 1131

Python - tmp file or object

The output from the first open() is the input for the second open(). I need either a tmp file created or an object where the first output is stored. Basically I wrote these two operations seperatly and I have difficulty putting them together. What happens now is that the two outputs are concatenated in one file: output1 + output2 and I only want output2 in the final file.

import csv
import sys
from collections import Counter


ofile = open(sys.argv[2], 'wb')
writer = csv.writer(ofile, delimiter='\t')

with open(sys.argv[1], 'rb') as ifile:
        reader = csv.reader(ifile, delimiter='\t')

        for line in reader:
            freqs = Counter(line)
            if len(freqs.items()) < 4:
              continue
            else: writer.writerow(line)

with open(sys.argv[2], 'rb') as ifile2:           
        findlist = ['A', 'G', 'C', 'T', 'Y', 'R', 'W', 'S', 'K', 'M', 'X', 'N', '-']
        replacelist = ['2', '19', '5', '29', '17', '7', '11', '13', '23', '3', '0', '0', '0']

        s = ifile2.read()
        for item, replacement in zip(findlist, replacelist):
            s = s.replace(item, replacement)
        ofile.write(s)

ofile.close()

Upvotes: 0

Views: 102

Answers (2)

Hugh Bothwell
Hugh Bothwell

Reputation: 56634

I've reorganized your code to hopefully make it easier to reuse:

import csv
import sys
from collections import Counter

def load_csv(fname, **kwargs):
    with open(fname, 'rb') as inf:
        in_csv = csv.reader(inf, **kwargs)
        return list(in_csv)

def save_csv(fname, data, header=None, **kwargs):
    with open(fname, 'wb') as outf:
        out_csv = csv.writer(outf, **kwargs)
        if header is not None:
            out_csv.writerow(header)
        out_csv.writerows(data)

def main(in_fname, out_fname):
    data = load_csv(in_fname, delimiter='\t')

    filtered = (row for row in data if len(Counter(row).items()) >= 4)
    # save_csv(out_fname, filtered, delimiter='\t')     # just skip this step!

    replace = dict(zip(
        ['A', 'G',  'C', 'T',  'Y',  'R', 'W',  'S',  'K',  'M', 'X', 'N', '-'],
        ['2', '19', '5', '29', '17', '7', '11', '13', '23', '3', '0', '0', '0']
    ))
    replaced = ([replace.get(item, item) for item in row] for row in filtered)
    save_csv(out_fname, replaced, delimiter='\t')

if __name__=="__main__":
    if len(sys.argv) < 3:
        print('Usage: myprog.py input.csv output.csv')
    else:
        main(sys.argv[1], sys.argv[2])

Upvotes: 2

fuesika
fuesika

Reputation: 3330

If you don't want the data from the first file show up in the output-file, you should remove the coresponding line

else: writer.writerow(line)

Upvotes: 0

Related Questions