Reputation: 1
I have two CSV files that each have two columns, id
and name
. I want to compare both files by their name
columns; if the values match, then create a new CSV file with the id
values from both files.
1.csv:
id, name
1, sofia
2, Maria
3, sofia
4, Laura
2.csv:
id, name
1, sofia
2, Laura
My code:
import csv
with open('1.csv') as companies, open('2.csv') as tags:
companies = companies.readlines()
tags = tags.readlines()
with open('CompanieTags.csv', 'w') as outFile:
for line in companies:
if line[1] != tags[1]:
line2 = companies[1]
outFile.write(line[0] and linea2)
Other code with Dict's
import csv
with open('1.csv') as companies, open('2.csv') as tags:
reader = csv.DictReader(companies)
check = csv.DictReader(tags)
with open('CompanieTags.csv', 'w') as outFile:
for x in check:
SaveTag = x['name']
for y in reader:
if SaveTag in y['name'] :
outFile.write(y['id'], x['id'])
Expected result:
id, name
1, 1
3, 1
4, 2
Upvotes: 0
Views: 174
Reputation: 23815
Another version of the answer:
- not using itertools
- loading the csv files
- using the csv files in the post
import csv
NAME = 1
ID = 0
def load_csv(file_name):
res = []
with open(file_name) as f:
reader = csv.reader(f)
for idx, row in enumerate(reader):
if idx > 0:
res.append(row)
return res
lst1 = load_csv('1.csv')
lst2 = load_csv('2.csv')
result = []
for x in lst1:
for y in lst2:
if x[NAME] == y[NAME]:
result.append((x[ID], y[ID]))
print(result)
output
[('1', '1'), ('3', '1'), ('4', '2')]
Upvotes: 0
Reputation: 23815
Here
(I skip loading the data from files to list of tuples - I assume you can do it)
import csv
from itertools import cycle
lst1 = [(1, 'Jack'), (4, 'Ben'), (5, 'Sofi')]
lst2 = [(12, 'Jack'), (4, 'Jack'), (15, 'Jack')]
names1 = {x[1] for x in lst1}
names2 = {x[1] for x in lst2}
common = names1.intersection(names2)
common_in_1 = [x[0] for x in lst1 if x[1] in common]
common_in_2 = [x[0] for x in lst2 if x[1] in common]
result = zip(common_in_1, cycle(common_in_2)) if len(common_in_1) > len(common_in_2) else zip(cycle(common_in_1),
common_in_2)
print(list(result))
# write to output file
with open('out.csv', mode='w', newline='') as f:
writer = csv.writer(f)
writer.writerows(result)
output
[(1, 12), (1, 4), (1, 15)]
Upvotes: 1