simplify dealing with end of file

Question

I have the following file:

chr11_pilon3.g3568.t1   transcript:OIS96097 82.2    168 30
chr11_pilon3.g3568.t2   transcript:OIS96097 82.2    169 30
gene.100079.0.5.p3  transcript:OIS96097 82.2    169 30
gene.100079.0.3.p1  transcript:OIS96097 82.2    169 30
gene.100079.0.0.p1  transcript:OIS96097 82.2    169 35
gene.100080.0.3.p1  transcript:OIS96097 82.2    169 40
gene.100080.0.0.p1  transcript:OIS96097 82.2    169 40

and I get the following output:

chr11_pilon3.g3568.t1   transcript:OIS96097 82.2    168 30
chr11_pilon3.g3568.t2   transcript:OIS96097 82.2    169 30
gene.100079.0.0.p1  transcript:OIS96097 82.2    169 35
gene.100080.0.3.p1  transcript:OIS96097 82.2    169 40
gene.100080.0.0.p1  transcript:OIS96097 82.2    169 40

I just wonder whether there are ways to simplify the below code in terms how to deal with end of file

            try:
                lineParts = line.rstrip().split('	')
            except IndexError:
                continue

and

        if dataStorage:  # check if Dict is **not** empty
            output(tmp_id, dataStorage, out_fn)

?

def output(tmp_id, dataDict, out_fn):
    for isoformID in dataDict.keys():
        mx = max(dataDict[isoformID], key=lambda x: int(x[4]))[4]
        mx_values = [d for d in dataDict[isoformID] if d[4] == mx]
        for mx_value in mx_values:
            out_fn.write('	'.join(mx_value) + '
')

    del dataDict[isoformID]
    dataDict[tmp_id].append(lineParts)
    return dataDict

dataStorage = defaultdict(list)

with open("data/datap-bigcov.tsv") as data_fn, open("data/datap-bigcov-out.tsv", 'w') as out_fn:
    for line in data_fn:
        try:
            lineParts = line.rstrip().split('	')
        except IndexError:
            continue
        if lineParts[0].startswith("gene"):
            split_id = lineParts[0].split('.')
            tmp_id = split_id[0] + "." + split_id[1]
        else:
            tmp_id = lineParts[0]

        if not dataStorage:  # check if Dict is empty
            dataStorage[tmp_id].append(lineParts)
        elif tmp_id in dataStorage:
            dataStorage[tmp_id].append(lineParts)
        else:
            dataStorage = output(tmp_id, dataStorage, out_fn)
    if dataStorage:  # check if Dict is **not** empty
        output(tmp_id, dataStorage, out_fn)

simplify dealing with end of file

Answers (1)

Related Questions