Reputation: 17

Search for string in CSV Files using python and write the results

   #!/usr/bin/python

import csv
import re

string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')

#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "

#def does_match(string):
#    stringl = string.lower()
#    return any(s in stringl for s in searched)

inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)

for row in reader:
    found = False
    for col in row:
        if col in [string_1, string_2, string_3] and not found:
            writer.writerow(row)
            found = True


#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

inFile.close()
outFile.close()

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

Sample Data File

LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300

Upvotes: 1

Answers (3)

Burhan Khalid

Reputation: 174758

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

Try this:

import csv

search_for = ['OneTouch AT','LinkRunner AT','AirCheck']

with open('in.csv') as inf, open('out.csv','w') as outf:
    reader = csv.reader(inf)
    writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        if row[0] in search_for:
            print('Found: {}'.format(row))
            writer.writerow(row)

Upvotes: 3

Kris

Reputation: 17

#!/usr/bin/python

import csv
import re
import sys
import gdata.docs.service


#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')

searched = ['aircheck', 'linkrunner at', 'onetouch at']

def find_group(row):
    """Return the group index of a row
        0 if the row contains searched[0]
        1 if the row contains searched[1]
        etc
        -1 if not found
    """
    for col in row:
        col = col.lower()
        for j, s in enumerate(searched):
            if s in col:
                return j
        return -1

def does_match(string):
    stringl = string.lower()
    return any(s in stringl for s in searched)

#Opens Input file for read and output file to write.
inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)


#for row in reader:
#   found = False
#   for col in row:
#       if col in [string_1, string_2, string_3] and not found:
#           writer.writerow(row)
#           found = True



"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
    one stores a triple (1, 12, row)
    When the triples are sorted later, all rows in group 0 will come first, then
    all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
    g = find_group(row)
    if g >= 0:
        stored.append((g, i, row))
stored.sort()

for g, i, row in stored:
    writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5

#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

# Closing Input and Output files.
inFile.close()
outFile.close()

Upvotes: 0

Ryder Brooks

Reputation: 2119

#!/usr/bin/python

import csv
import numpy as np

class search_csv(object):
    def __init__(self, infile, outfile):
        infile = open(infile, 'rb')
        read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
        self.non_numpy_data = read_infile
        self.data = np.array(read_infile, dtype=None)
        self.outfile = open(outfile, 'wb')
        self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    def write_to(self, matched_values):
        self.writer_.writerows(matched_values)
        print ' Matched Values Written '
        return True

    def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
        find_these = np.array(items, dtype=None)
        matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
        matching_data = self.data[matching_y][:,return_cols]
        self.write_to(matching_data)
        self.outfile.close()
        return True

    def non_numpy_search(self, items, return_cols=[0,2]):
        lst = []
        for i in self.non_numpy_data:
            for ii in items:
                if ii in i:
                    z = []
                    for idx in return_cols:
                        z.append(i[idx])
                    lst.append(z)
                break
        self.write_to(lst)
        return True


### now use the class ###

SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']

IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'

non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)

By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.

Things to note.... SEARCHING_FOR should be a list.

the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.

In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.

If you don't have numpy let me know.

Upvotes: 0

Search for string in CSV Files using python and write the results

Answers (3)

Related Questions