Unverified Commit 55d05ac4 authored by Isis Lovecruft's avatar Isis Lovecruft
Browse files

Add Levenshtein Distance function to bridgedb.util module.

 * ADD new function, `bridgedb.util.levenshteinDistance()`.
parent f8907b32
......@@ -142,6 +142,46 @@ def configureLogging(cfg):
logging.info("Level: %s", logLevel)
logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
def levenshteinDistance(s1, s2, len1=None, len2=None,
offset1=0, offset2=0, memo=None):
"""Compute the Levenstein Distance between two strings.
The `Levenshtein String Distance Algorithm`_ efficiently computes the
number of characters which must be changed in **s1** to make it
identical to **s2**.
.. `Levenshtein String Distance Algorithm`:
https://en.wikipedia.org/wiki/Levenshtein_distance
>>> levenshteinDistance('cat', 'cat')
0
>>> levenshteinDistance('cat', 'hat')
1
>>> levenshteinDistance('arma', 'armadillo')
5
:param str s1: The string which should be changed.
:param str s2: The string which **stringOne** should be compared to.
"""
len1 = len(s1) if len1 is None else len1
len2 = len(s2) if len2 is None else len2
memo = {} if memo is None else memo
key = ','.join([str(offset1), str(len1), str(offset2), str(len2)])
if memo.get(key) is not None: return memo[key]
if len1 == 0: return len2
elif len2 == 0: return len1
cost = 0 if (s1[offset1] == s2[offset2]) else 1
distance = min(
levenshteinDistance(s1, s2, len1-1, len2, offset1+1, offset2, memo) + 1,
levenshteinDistance(s1, s2, len1, len2-1, offset1, offset2+1, memo) + 1,
levenshteinDistance(s1, s2, len1-1, len2-1, offset1+1, offset2+1, memo) + cost,
)
memo[key] = distance
return distance
class JustifiedLogFormatter(logging.Formatter):
"""A logging formatter which pretty prints thread and calling function
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment