count instances of duplicates in a colum of a .csv file

Question

import re, csv
import os,shutil
import io,json, collections
from collections import Counter, defaultdict,deque

sn=0 #1st column
p_f=1 #2nd column


reader = csv.reader(open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv", "r"), delimiter='	')


f= csv.writer(open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/final.csv", "w"))


g=open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv",'r') 
with open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv", 'r') as infh:
    data = csv.reader(infh)
    next(data)  # skip header

    seen = defaultdict(set)

    counts = Counter(
    row[sn]    
    for row in data
    if row[sn] and row[p_f] not in seen[row[sn]] and not seen[row[sn]].add(row[sn])   
    )


print(counts.most_common())
#want to count instances of the number 2 in [('VFGRP15040030', 2), ('VFGRP15370118', 2), ('VFGRP15150113', 2)]


x=len(list(csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv'))))
print('# of rows including header=');print(x)
count_pass = sum(1 for row in csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv')) if row[1] =='pass')
print('# of passes=');print(count_pass)

count_fail = sum(1 for row in csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv')) if row[1] =='fail')
print('# of fails=');print(count_fail)

#count_retest = ??




g.close
#f.close

count instances of duplicates in a colum of a .csv file

Answers (1)

Related Questions