Source code for python_template.text.levenshtein

"""
This is a short file containing a levenshtein implementation.
"""


[docs]def levenshtein(seq1, seq2): """Function to compute the Levenshtein distance between two strings. Reference: https://en.wikipedia.org/wiki/Levenshtein_distance Parameters ---------- seq1 : str The first string to compare seq2 : str The second string to compare Returns ------- distance : int The Levenshtein distance between two strings Notes ----- The Levenshtein distance between two words is the minimum number of single-character edits required to change one word into the other. This is often very useful for finding approximate string matches. For example throwing an error with suggestions due to a keyword mismatch. Examples -------- Find the distance between kitten and sitting: >>> levenshtein("kitten", "sitting") 3 """ # Do some type checking if desired if not isinstance(seq1, str): raise TypeError("levenshtein: first arguement must be a string, input type %s" % type(seq1)) if not isinstance(seq2, str): raise TypeError("levenshtein: second arguement must be a string, input type %s" % type(seq2)) # Iterative "two row" levenshtein implementation oneago = None thisrow = list(range(1, len(seq2) + 1)) + [0] for x in range(len(seq1)): twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1] for y in range(len(seq2)): delcost = oneago[y] + 1 addcost = thisrow[y - 1] + 1 subcost = oneago[y - 1] + (seq1[x] != seq2[y]) thisrow[y] = min(delcost, addcost, subcost) # Return return thisrow[len(seq2) - 1]