Reputation: 11
import re, csv
import os,shutil
import io,json, collections
from collections import Counter, defaultdict,deque
sn=0 #1st column
p_f=1 #2nd column
reader = csv.reader(open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv", "r"), delimiter='\t')
f= csv.writer(open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/final.csv", "w"))
g=open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv",'r')
with open("C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv", 'r') as infh:
data = csv.reader(infh)
next(data) # skip header
seen = defaultdict(set)
counts = Counter(
row[sn]
for row in data
if row[sn] and row[p_f] not in seen[row[sn]] and not seen[row[sn]].add(row[sn])
)
print(counts.most_common())
#want to count instances of the number 2 in [('VFGRP15040030', 2), ('VFGRP15370118', 2), ('VFGRP15150113', 2)]
x=len(list(csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv'))))
print('# of rows including header=');print(x)
count_pass = sum(1 for row in csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv')) if row[1] =='pass')
print('# of passes=');print(count_pass)
count_fail = sum(1 for row in csv.reader(open('C:/Users/gurbir.sahota/Documents/python_csv_file_program/remove_duplicates.csv')) if row[1] =='fail')
print('# of fails=');print(count_fail)
#count_retest = ??
g.close
#f.close
Upvotes: 0
Views: 607
Reputation: 5658
# to get duplicates and their frequency for a column
from collections import Counter
from operator import itemgetter
with open('data.csv', 'r', newline='') as f:
r = csv.reader(f)
# here we take as example column number 1
cn = Counter(map(itemgetter(1), r))
# print item that appears more than once in the column
for k, v in cn.items():
if v > 1:
print(k,v)
Upvotes: 1