How to implement mapreduce pairs pattern in python

Question

I am trying to attempt the mapreduce pairs pattern in python. Need to check if a word is in a text file and then find the word next to it and yield a pair of both words. keep running into either:

neighbors = words[words.index(w) + 1]
ValueError: substring not found

or

 ValueError: ("the") is not in list

file cwork_trials.py

from mrjob.job import MRJob

class MRCountest(MRJob):
    # Word count
    def mapper(self, _, document):
        # Assume document is a list of words.
        #words = []
        words = document.strip()

        w = "the"
        neighbors = words.index(w)
        for word in words:
            #searchword = "the"
            #wor.append(str(word))
            #neighbors = words[words.index(w) + 1]
            yield(w,1)

    def reducer(self, w, values):
        yield(w,sum(values))

if __name__ == '__main__':
    MRCountest.run()

Edit: Trying to use the pairs pattern to search a document for every instance of a specific word and then find the word next to it each time. Then yielding a pair result for each instance i.e. find instances of "the" and the word next to it i.e. [the], [book], [the], [cat] etc.

from mrjob.job import MRJob

class MRCountest(MRJob):
# Word count
def mapper(self, _, document):
    # Assume document is a list of words.
    #words = []
    words = document.split(" ")

    want = "the"
    for w, want in enumerate(words, 1):
        if (w+1) < len(words):
            neighbors = words[w + 1]
            pair = (want, neighbors)
            for u in neighbors:
                if want is "the":
                    #pair = (want, neighbors)
                    yield(pair),1
    #neighbors = words.index(w)
    #for word in words:

        #searchword = "the"
        #wor.append(str(word))
        #neighbors = words[words.index(w) + 1]
        #yield(w,1)

#def reducer(self, w, values):
    #yield(w,sum(values))

if __name__ == '__main__':
MRCountest.run()

As it stands I get yields of every word pair with multiples of the same pairing.

How to implement mapreduce pairs pattern in python

Answers (1)

Related Questions