user2085779
user2085779

Reputation:

Getting IO error in Python

When I try to execute one particular python file. I am getting following errors

Traceback (most recent call last):
  File "<pyshell#4>", line 1, in <module>
    g.stem(u"തുറക്കുക")
  File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 48, in stem
    self.rulesDict = self.LoadRules()
  File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 81, in LoadRules
    errors='ignore')
  File "/usr/lib/python2.7/codecs.py", line 881, in open
    file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'

What is the problem here?

class Stemmer:
    """
    Instantiate class to get the methods
    """
    def __init__(self):
       self.rules_file = os.path.join(os.path.dirname(__file__), \
        'stemmer_ml.rules')
        self.rulesDict = None
        self.normalizer = normalizer.getInstance()

    def stem(self, text):
        """
        :param text: unicode encoded malayalam string
        :returns: dictionary with words as the key and the stemmer result
        as the values. stems all the words in the given text and
        returns a dictionary
        """
        text = self.normalizer.normalize(text)
        if self.rulesDict is None:
            self.rulesDict = self.LoadRules()
        words = text.split(" ")
        word_count = len(words)
        result_dict = dict()
        word_iter = 0
        word = ""
        while word_iter < word_count:
            word = words[word_iter]
            word = self.trim(word)
            word = word.strip('!,.?:')
            word_length = len(word)
            suffix_pos_itr = 2
            word_stemmed = ""
            while suffix_pos_itr < word_length:
                suffix = word[suffix_pos_itr:word_length]
                if suffix in self.rulesDict:
                    word_stemmed = word[0:suffix_pos_itr] + \
                        self.rulesDict[suffix]
                    break
                suffix_pos_itr = suffix_pos_itr+1
            word_iter = word_iter+1
            if(word_stemmed == ""):
                word_stemmed = word
            result_dict[ word ] = word_stemmed
            print result_dict[2]
        return result_dict

    def LoadRules(self):
        #print "Loading the rules..."
        rules_dict = dict()
        line = []
        line_number = 0
        rule_number = 0
        rules_file = codecs.open(self.rules_file, encoding='utf-8', \
            errors='ignore')
        while 1:
            line_number = line_number +1
            text = unicode( rules_file.readline())
            if text == "":
                break
            if text[0] == '#':
                continue  #this is a comment - ignore
            text = text.split("#")[0]   #remove the comment part of the line
            line_number = line_number +1
            line = text.strip()  # remove unwanted space
            if(line == ""):
                continue
            if(len(line.split("=")) != 2):
                print "[Error] Syntax Error in the Rules. Line number: ", \
                    line_number
                print "Line: "+ text
                continue
            lhs = line.split("=")[0].strip()
            rhs = line.split("=")[1].strip()
            if(len(rhs)>0):
                if(lhs[0] == '"'):
                    lhs = lhs[1:len(lhs)] # if the string is "quoted"
                if(lhs[len(lhs)-1] == '"'):
                    lhs = lhs[0:len(lhs)-1] # if the string is "quoted"
            if(len(rhs)>0):
                if(rhs[0] == '"'):
                    rhs = rhs[1:len(rhs)]  # if the string is "quoted"
                if(rhs[len(rhs)-1] == '"'):
                    rhs = rhs[0:len(rhs)-1]     # if the string is "quoted"
            rule_number = rule_number+1
            rules_dict[lhs] = rhs
            #print "[", rule_number ,"] " +lhs + " : " +rhs
        #print "Found ",rule_number, " rules."
        return rules_dict

setup file

from setuptools import setup, find_packages

name = "indicstemmer"

setup(
    name=name,
    version="0.1",
    license="LGPL-3.0",
    description="Malayalam word stemmer",

    long_description="""This application helps you to stem the words
    in the given text. Currently supports only 
    Note that this is very experimental and uses a rule based approach.

    """,
    packages=find_packages(),
    include_package_data=True,
    setup_requires=['setuptools-git'],
    install_requires=['setuptools','normalizer'],
    test_suite="tests",
    zip_safe=False,
)

Test

import unittest
from indicstemmer import getInstance


class TestIndicStemmer(unittest.TestCase):

    def setUp(self):
        self.instance = getInstance()

    def test_stemmer(self):
        self.assertEqual(u"തുറക്കുക",self.instance.stem(u"തുറക്കുന്ന")[u"തുറക്കുന്ന"])

def main():
    suite = unittest.TestLoader().loadTestsFromTestCase(TestIndicStemmer)
    unittest.TextTestRunner(verbosity=2).run(suite)

if __name__ == "__main__":
    main()

I am using Ubuntu 12.04 desktop version

Upvotes: 1

Views: 1077

Answers (1)

sabbahillel
sabbahillel

Reputation: 4425

The significant line of the error message is

File "/usr/lib/python2.7/codecs.py", line 881, in open
    file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'

This implies that the indicstemmer modules had not been installed properly because the required file stemmer_ml.rules could not be found.

check that you do not need to set up defaults before you call gstem() and that the permissions in the python library allow you to get to the rules. Other than that the library package my need to be reinstalled. I have noticed that if different versions of python exist, sometimes packages get installed in the wrong version. However, I doubt it in this case because it got all the way to the rules file before crashing.

Upvotes: 1

Related Questions