Michael
Michael

Reputation: 1

String subclass for phonetic Levenshtein distance

I'm trying to create a string subclass, in order to represent Arpabet symbols as single characters. Here is what I got so far:

import Levenshtein
class ArpabetChar(str):
"""
Class that turn string into an Arpabet character.
http://www.speech.cs.cmu.edu/cgi-bin/cmudict
"""
    def __init__(self, chars: list):
        self._chars = chars

    def __repr__(self):
        return "".join(char for char in self._chars)

    def __str__(self):
        return "".join(char for char in self._chars)

    def __eq__(self, other):
        if self._chars == other._chars:
            return True
        else:
            return False

    def __len__(self):
        return len(self._chars)

    def __getitem__(self, item):
        return self._chars[item]

    def __iter__(self):
        for char in self._chars:
            yield ArpabetChar([char])

    def __add__(self, other):
        added_char = [char for char in self._chars]
        for char in other._chars:
            added_char.append(char)
        return ArpabetChar(added_char)

char1 = ArpabetChar(["AH0"])
char2 = ArpabetChar(["AH1"])
char3 = ArpabetChar(["AE1"])
print("Indexing:", char1[0])
print(f"Length of {char1}: {len(char1)}")
print(f"Length of {char2}: {len(char2)}")

print(f"Levenshtein distance {char1} and {char2}:{Levenshtein.distance(char1, char2)}")
print(f"Levenshtein distance {char1} and {char3}:{Levenshtein.distance(char1, char3)}")

The output that I expect for both calculations is a Levenshtein distance of 1. Any hints or suggestions?

Upvotes: 0

Views: 63

Answers (0)

Related Questions