Reputation:
When I try to execute one particular python file. I am getting following errors
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
g.stem(u"തുറക്കുക")
File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 48, in stem
self.rulesDict = self.LoadRules()
File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 81, in LoadRules
errors='ignore')
File "/usr/lib/python2.7/codecs.py", line 881, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'
What is the problem here?
class Stemmer:
"""
Instantiate class to get the methods
"""
def __init__(self):
self.rules_file = os.path.join(os.path.dirname(__file__), \
'stemmer_ml.rules')
self.rulesDict = None
self.normalizer = normalizer.getInstance()
def stem(self, text):
"""
:param text: unicode encoded malayalam string
:returns: dictionary with words as the key and the stemmer result
as the values. stems all the words in the given text and
returns a dictionary
"""
text = self.normalizer.normalize(text)
if self.rulesDict is None:
self.rulesDict = self.LoadRules()
words = text.split(" ")
word_count = len(words)
result_dict = dict()
word_iter = 0
word = ""
while word_iter < word_count:
word = words[word_iter]
word = self.trim(word)
word = word.strip('!,.?:')
word_length = len(word)
suffix_pos_itr = 2
word_stemmed = ""
while suffix_pos_itr < word_length:
suffix = word[suffix_pos_itr:word_length]
if suffix in self.rulesDict:
word_stemmed = word[0:suffix_pos_itr] + \
self.rulesDict[suffix]
break
suffix_pos_itr = suffix_pos_itr+1
word_iter = word_iter+1
if(word_stemmed == ""):
word_stemmed = word
result_dict[ word ] = word_stemmed
print result_dict[2]
return result_dict
def LoadRules(self):
#print "Loading the rules..."
rules_dict = dict()
line = []
line_number = 0
rule_number = 0
rules_file = codecs.open(self.rules_file, encoding='utf-8', \
errors='ignore')
while 1:
line_number = line_number +1
text = unicode( rules_file.readline())
if text == "":
break
if text[0] == '#':
continue #this is a comment - ignore
text = text.split("#")[0] #remove the comment part of the line
line_number = line_number +1
line = text.strip() # remove unwanted space
if(line == ""):
continue
if(len(line.split("=")) != 2):
print "[Error] Syntax Error in the Rules. Line number: ", \
line_number
print "Line: "+ text
continue
lhs = line.split("=")[0].strip()
rhs = line.split("=")[1].strip()
if(len(rhs)>0):
if(lhs[0] == '"'):
lhs = lhs[1:len(lhs)] # if the string is "quoted"
if(lhs[len(lhs)-1] == '"'):
lhs = lhs[0:len(lhs)-1] # if the string is "quoted"
if(len(rhs)>0):
if(rhs[0] == '"'):
rhs = rhs[1:len(rhs)] # if the string is "quoted"
if(rhs[len(rhs)-1] == '"'):
rhs = rhs[0:len(rhs)-1] # if the string is "quoted"
rule_number = rule_number+1
rules_dict[lhs] = rhs
#print "[", rule_number ,"] " +lhs + " : " +rhs
#print "Found ",rule_number, " rules."
return rules_dict
setup file
from setuptools import setup, find_packages
name = "indicstemmer"
setup(
name=name,
version="0.1",
license="LGPL-3.0",
description="Malayalam word stemmer",
long_description="""This application helps you to stem the words
in the given text. Currently supports only
Note that this is very experimental and uses a rule based approach.
""",
packages=find_packages(),
include_package_data=True,
setup_requires=['setuptools-git'],
install_requires=['setuptools','normalizer'],
test_suite="tests",
zip_safe=False,
)
Test
import unittest
from indicstemmer import getInstance
class TestIndicStemmer(unittest.TestCase):
def setUp(self):
self.instance = getInstance()
def test_stemmer(self):
self.assertEqual(u"തുറക്കുക",self.instance.stem(u"തുറക്കുന്ന")[u"തുറക്കുന്ന"])
def main():
suite = unittest.TestLoader().loadTestsFromTestCase(TestIndicStemmer)
unittest.TextTestRunner(verbosity=2).run(suite)
if __name__ == "__main__":
main()
I am using Ubuntu 12.04 desktop version
Upvotes: 1
Views: 1077
Reputation: 4425
The significant line of the error message is
File "/usr/lib/python2.7/codecs.py", line 881, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'
This implies that the indicstemmer modules had not been installed properly because the required file stemmer_ml.rules could not be found.
check that you do not need to set up defaults before you call gstem() and that the permissions in the python library allow you to get to the rules. Other than that the library package my need to be reinstalled. I have noticed that if different versions of python exist, sometimes packages get installed in the wrong version. However, I doubt it in this case because it got all the way to the rules file before crashing.
Upvotes: 1