# Algorithm Implementation/Strings/Levenshtein distance

The implementations of the Levenshtein algorithm on this page are illustrative only. Applications will, in most cases, use implementations which use heap allocations sparingly, in particular when large lists of words are compared to each other. The following remarks indicate some of the variations on this and related topics:

• Most implementations use one- or two-dimensional arrays to store the distances of prefixes of the words compared. In most applications the size of these structures is previously known. This is the case, when, for instance the distance is relevant only if it is below a certain maximally allowed distance (this happens when words are selected from a dictionary to approximately match a given word). In this case the arrays can be preallocated and reused over the various runs of the algorithm over successive words.
• Using a maximum allowed distance puts an upper bound on the search time. The search can be stopped as soon as the minimum Levenshtein distance between prefixes of the strings exceeds the maximum allowed distance.
• Deletion, insertion, and replacement of characters can be assigned different weights. The usual choice is to set all three weights to 1. Different values for these weights allows for more flexible search strategies in lists of words.

## Action Script 3

function levenshtein(s1:String, s2:String):uint
{
const len1:uint = s1.length, len2:uint = s2.length;
var d:Vector.<Vector.<uint> >=new Vector.<Vector.<uint> >(len1+1);
for(i=0; i<=len1; ++i)
d[i] = new Vector.<uint>(len2+1);

d[0][0]=0;

var i:int;
var j:int;

for(i=1; i<=len1; ++i) d[i][0]=i; //int faster than uint
for(i=1; i<=len2; ++i) d[0][i]=i;

for(i = 1; i <= len1; ++i)
for(j = 1; j <= len2; ++j)
d[i][j] = Math.min( Math.min(d[i - 1][j] + 1,d[i][j - 1] + 1),
d[i - 1][j - 1] + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1) );
return(d[len1][len2]);

}


function Levenshtein(Left, Right : String) return Natural is
D : array(0 .. Left'Last, 0 .. Right'Last) of Natural;
begin
for I in D'range(1) loop D(I, 0) := I;end loop;

for J in D'range(2) loop D(0, J) := J;end loop;

for I in Left'range loop
for J in Right'range loop
D(I, J) := Natural'Min(D(I - 1, J), D(I, J - 1)) + 1;
D(I, J) := Natural'Min(D(I, J), D(I - 1, J - 1) + Boolean'Pos(Left(I) /= Right(J)));
end loop;
end loop;

return D(D'Last(1), D'Last(2));
end Levenshtein;


## AWK

function levenshtein(str1, str2, cost_ins, cost_rep, cost_del,    str1_len, str2_len, matrix, is_match, i, j, x, y, z) {
if(cost_ins == "") cost_ins = 1
if(cost_rep == "") cost_rep = 1
if(cost_del == "") cost_del = 1
str1_len = length(str1)
str2_len = length(str2)
if(str1_len == 0) return str2_len * cost_ins
if(str2_len == 0) return str1_len * cost_del
matrix[0, 0] = 0
for(i = 1; i <= str1_len; i++) {
matrix[i, 0] = i * cost_del
for(j = 1; j <= str2_len; j++) {
matrix[0, j] = j * cost_ins
x = matrix[i - 1, j] + cost_del
y = matrix[i, j - 1] + cost_ins
z = matrix[i - 1, j - 1] + (substr(str1, i, 1) == substr(str2, j, 1) ? 0 : cost_rep)
x = x < y ? x : y
matrix[i, j] = x < z ? x : z
}
}
return matrix[str1_len, str2_len]
}


## Bash

#!/bin/bash
function levenshtein {
if (( $# != 2 )); then echo "Usage:$0 word1 word2" >&2
elif (( ${#1} <${#2} )); then
levenshtein "$2" "$1"
else
local str1len=${#1} local str2len=${#2}
local d

for (( i = 0; i <= (str1len+1)*(str2len+1); i++ )); do
d[i]=0
done

for (( i = 0; i <= str1len; i++ )); do
d[i+0*str1len]=$i done for (( j = 0; j <= str2len; j++ )); do d[0+j*(str1len+1)]=$j
done

for (( j = 1; j <= str2len; j++ )); do
for (( i = 1; i <= str1len; i++ )); do
[ "${1:i-1:1}" = "${2:j-1:1}" ] && local cost=0 || local cost=1
del=$(( d[(i-1)+str1len*j]+1 )) ins=$(( d[i+str1len*(j-1)]+1 ))
alt=$(( d[(i-1)+str1len*(j-1)]+cost )) d[i+str1len*j]=$( echo -e "$del\n$ins\n$alt" | sort -n | head -1 ) done done echo${d[str1len+str1len*(str2len)]}
fi
}


## Common Lisp

(defun levenshtein-distance (str1 str2)
"Calculates the Levenshtein distance between str1 and str2, returns an editing distance (int)."
(let ((n (length str1))
(m (length str2)))
;; Check trivial cases
(cond ((= 0 n) (return-from levenshtein-distance m))
((= 0 m) (return-from levenshtein-distance n)))
(let ((col (make-array (1+ m) :element-type 'integer))
(prev-col (make-array (1+ m) :element-type 'integer)))
;; We need to store only two columns---the current one that
;; is being built and the previous one
(dotimes (i (1+ m))
(setf (svref prev-col i) i))
;; Loop across all chars of each string
(dotimes (i n)
(setf (svref col 0) (1+ i))
(dotimes (j m)
(setf (svref col (1+ j))
(min (1+ (svref col j))
(1+ (svref prev-col (1+ j)))
(+ (svref prev-col j)
(if (char-equal (schar str1 i) (schar str2 j)) 0 1)))))
(rotatef col prev-col))
(svref prev-col m))))


## C

int min(int a, int b, int c)
{
if(a <= b && a <= c)
{
return a;
}
else if(b <= a && b <= c)
{
return b;
}
else if(c <= a && c <= b)
{
return c;
}
}

int levenshtein(char *s1, char *s2) {
unsigned int x, y, s1len, s2len;
s1len = strlen(s1);
s2len = strlen(s2);
unsigned int matrix[s2len+1][s1len+1];
matrix[0][0] = 0;
for (x = 1; x <= s2len; x++)
matrix[x][0] = matrix[x-1][0] + 1;
for (y = 1; y <= s1len; y++)
matrix[0][y] = matrix[0][y-1] + 1;
for (x = 1; x <= s2len; x++)
for (y = 1; y <= s1len; y++)
matrix[x][y] = min(matrix[x-1][y] + 1, matrix[x][y-1] + 1, matrix[x-1][y-1] + (s1[y-1] == s2[x-1] ? 0 : 1));

return(matrix[s2len][s1len]);
}


The above can be optimized to use O(min(m,n)) space instead of O(mn). The key observation is that we only need to access the contents of the previous column when filling the matrix column-by-column. Hence, we can re-use a single column over and over, overwriting its contents as we proceed.

#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))

int levenshtein(char *s1, char *s2) {
unsigned int s1len, s2len, x, y, lastdiag, olddiag;
s1len = strlen(s1);
s2len = strlen(s2);
unsigned int column[s1len + 1];
for (y = 1; y <= s1len; y++)
column[y] = y;
for (x = 1; x <= s2len; x++) {
column[0] = x;
for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
olddiag = column[y];
column[y] = MIN3(column[y] + 1, column[y - 1] + 1, lastdiag + (s1[y-1] == s2[x - 1] ? 0 : 1));
lastdiag = olddiag;
}
}
return column[s1len];
}


## C++

unsigned int edit_distance(const std::string& s1, const std::string& s2)
{
const std::size_t len1 = s1.size(), len2 = s2.size();
std::vector<std::vector<unsigned int>> d(len1 + 1, std::vector<unsigned int>(len2 + 1));

d[0][0] = 0;
for(unsigned int i = 1; i <= len1; ++i) d[i][0] = i;
for(unsigned int i = 1; i <= len2; ++i) d[0][i] = i;

for(unsigned int i = 1; i <= len1; ++i)
for(unsigned int j = 1; j <= len2; ++j)
// note that std::min({arg1, arg2, arg3}) works only in C++11,
// for C++98 use std::min(std::min(arg1, arg2), arg3)
d[i][j] = std::min({ d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) });
return d[len1][len2];
}


vector<vector<>>


isn't efficient here, since you don't need a 2 dimensional array.

Here's another implementation with usage of only ${\displaystyle \sim ~min(m,n)}$  memory (also it uses fact that ${\displaystyle {lev}_{a,b}(i-1,j-1)\leq lev_{a,b}(i,j)}$ , so it is not needed to take minimum out of 3 possibilities):

#include <algorithm>
#include <vector>

template<typename T>
typename T::size_type LevenshteinDistance(const T &source, const T &target) {
if (source.size() > target.size()) {
return LevenshteinDistance(target, source);
}

using TSizeType = typename T::size_type;
const TSizeType min_size = source.size(), max_size = target.size();
std::vector<TSizeType> lev_dist(min_size + 1);

for (TSizeType i = 0; i <= min_size; ++i) {
lev_dist[i] = i;
}

for (TSizeType j = 1; j <= max_size; ++j) {
TSizeType previous_diagonal = lev_dist[0], previous_diagonal_save;
++lev_dist[0];

for (TSizeType i = 1; i <= min_size; ++i) {
previous_diagonal_save = lev_dist[i];
if (source[i - 1] == target[j - 1]) {
lev_dist[i] = previous_diagonal;
} else {
lev_dist[i] = std::min(std::min(lev_dist[i - 1], lev_dist[i]), previous_diagonal) + 1;
}
previous_diagonal = previous_diagonal_save;
}
}

return lev_dist[min_size];
}


Here is implementation of generalized Levenshtein distance with different costs of insertion, deletion and replacement:

#include <algorithm>
#include <vector>

template<typename T>
typename T::size_type GeneralizedLevenshteinDistance(const T &source,
const T &target,
typename T::size_type insert_cost = 1,
typename T::size_type delete_cost = 1,
typename T::size_type replace_cost = 1) {
if (source.size() > target.size()) {
return GeneralizedLevenshteinDistance(target, source, delete_cost, insert_cost, replace_cost);
}

using TSizeType = typename T::size_type;
const TSizeType min_size = source.size(), max_size = target.size();
std::vector<TSizeType> lev_dist(min_size + 1);

lev_dist[0] = 0;
for (TSizeType i = 1; i <= min_size; ++i) {
lev_dist[i] = lev_dist[i - 1] + delete_cost;
}

for (TSizeType j = 1; j <= max_size; ++j) {
TSizeType previous_diagonal = lev_dist[0], previous_diagonal_save;
lev_dist[0] += insert_cost;

for (TSizeType i = 1; i <= min_size; ++i) {
previous_diagonal_save = lev_dist[i];
if (source[i - 1] == target[j - 1]) {
lev_dist[i] = previous_diagonal;
} else {
lev_dist[i] = std::min(std::min(lev_dist[i - 1] + delete_cost, lev_dist[i] + insert_cost), previous_diagonal + replace_cost);
}
previous_diagonal = previous_diagonal_save;
}
}

return lev_dist[min_size];
}


## C#

        private int levenshtein(string a, string b)
{

if (string.IsNullOrEmpty(a))
{
if (!string.IsNullOrEmpty(b))
{
return b.Length;
}
return 0;
}

if (string.IsNullOrEmpty(b))
{
if (!string.IsNullOrEmpty(a))
{
return a.Length;
}
return 0;
}

int cost;
int[,] d = new int[a.Length + 1, b.Length + 1];
int min1;
int min2;
int min3;

for (int i = 0; i <= d.GetUpperBound(0); i += 1)
{
d[i, 0] = i;
}

for (int i = 0; i <= d.GetUpperBound(1); i += 1)
{
d[0, i] = i;
}

for (int i = 1; i <= d.GetUpperBound(0); i += 1)
{
for (int j = 1; j <= d.GetUpperBound(1); j += 1)
{
cost = (a[i-1] != b[j-1])? 1 : 0;

min1 = d[i - 1, j] + 1;
min2 = d[i, j - 1] + 1;
min3 = d[i - 1, j - 1] + cost;
d[i, j] = Math.Min(Math.Min(min1, min2), min3);
}
}

return d[d.GetUpperBound(0), d.GetUpperBound(1)];

}


An implementation with reduced memory usage

public int LevenshteinDistance(string source, string target){
if(string.IsNullOrEmpty(source)){
if(string.IsNullOrEmpty(target)) return 0;
return target.Length;
}
if(string.IsNullOrEmpty(target)) return source.Length;

if(source.Length > target.Length){
var temp = target;
target = source;
source = temp;
}

var m = target.Length;
var n = source.Length;
var distance = new int[2, m + 1];
// Initialize the distance matrix
for(var j = 1; j <= m; j++) distance[0, j] = j;

var currentRow = 0;
for(var i = 1; i <= n; ++i){
currentRow = i & 1;
distance[currentRow, 0] = i;
var previousRow = currentRow ^ 1;
for(var j = 1; j <= m; j++) {
var cost = (target[j - 1] == source[i - 1] ? 0 : 1);
distance[currentRow, j] = Math.Min(Math.Min(
distance[previousRow, j] + 1,
distance[currentRow, j - 1] + 1),
distance[previousRow, j - 1] + cost);
}
}
return distance[currentRow, m];
}


Damerau-Levenshtein distance is computed in the asymptotic time O ((max + 1) * min (first.length (), second.length ()))

    /// <summary>
/// Damerau-Levenshtein distance
/// </summary>
public class DamerauLevensteinMetric
{
private const int DEFAULT_LENGTH = 255;
private int[] _currentRow;
private int[] _previousRow;
private int[] _transpositionRow;

public DamerauLevensteinMetric()
: this(DEFAULT_LENGTH)
{
}

/// <summary>
///
/// </summary>
/// <param name="maxLength"></param>
public DamerauLevensteinMetric(int maxLength)
{
_currentRow = new int[maxLength + 1];
_previousRow = new int[maxLength + 1];
_transpositionRow = new int[maxLength + 1];
}

/// <summary>
/// Damerau-Levenshtein distance is computed in asymptotic time O((max + 1) * min(first.length(), second.length()))
/// </summary>
/// <param name="first"></param>
/// <param name="second"></param>
/// <param name="max"></param>
/// <returns></returns>
public int GetDistance(string first, string second, int max)
{
int firstLength = first.Length;
int secondLength = second.Length;

if (firstLength == 0)
return secondLength;

if (secondLength == 0) return firstLength;

if (firstLength > secondLength)
{
string tmp = first;
first = second;
second = tmp;
firstLength = secondLength;
secondLength = second.Length;
}

if (max < 0) max = secondLength;
if (secondLength - firstLength > max) return max + 1;

if (firstLength > _currentRow.Length)
{
_currentRow = new int[firstLength + 1];
_previousRow = new int[firstLength + 1];
_transpositionRow = new int[firstLength + 1];
}

for (int i = 0; i <= firstLength; i++)
_previousRow[i] = i;

char lastSecondCh = (char) 0;
for (int i = 1; i <= secondLength; i++)
{
char secondCh = second[i - 1];
_currentRow[0] = i;

// Compute only diagonal stripe of width 2 * (max + 1)
int from = Math.Max(i - max - 1, 1);
int to = Math.Min(i + max + 1, firstLength);

char lastFirstCh = (char) 0;
for (int j = from; j <= to; j++)
{
char firstCh = first[j - 1];

// Compute minimal cost of state change to current state from previous states of deletion, insertion and swapping
int cost = firstCh == secondCh ? 0 : 1;
int value = Math.Min(Math.Min(_currentRow[j - 1] + 1, _previousRow[j] + 1), _previousRow[j - 1] + cost);

// If there was transposition, take in account its cost
if (firstCh == lastSecondCh && secondCh == lastFirstCh)
value = Math.Min(value, _transpositionRow[j - 2] + cost);

_currentRow[j] = value;
lastFirstCh = firstCh;
}
lastSecondCh = secondCh;

int[] tempRow = _transpositionRow;
_transpositionRow = _previousRow;
_previousRow = _currentRow;
_currentRow = tempRow;
}

return _previousRow[firstLength];
}
}


## Clojure

(defn nextelt
"Given two characters, the previous row, and a row we are
building, determine out the next element for this row."
[char1 char2 prevrow thisrow position]
(if (= char1 char2)
(prevrow (- position 1))
(+ 1 (min
(prevrow (- position 1))
(prevrow position)
(last thisrow))))
)

(defn nextrow
"Based on the next character from string1 and the whole of string2
calculate the next row. Initially thisrow contains one number."
[char1 str2 prevrow thisrow]
(let [char2 (first str2)
position (count thisrow)]
(if (= (count thisrow) (count prevrow))
thisrow
(recur
char1
(rest str2)
prevrow
(conj thisrow (nextelt char1 char2 prevrow thisrow position))))))

(defn levenshtein
"Calculate the Levenshtein distance between two strings."
([str1 str2]
(let [row0 (vec (map first (map vector (iterate inc 1) str2)))]
(levenshtein 1 (vec (cons 0 row0)) str1 str2)))
([row-nr prevrow str1 str2]
(let [next-row (nextrow (first str1) str2 prevrow (vector row-nr))
str1-remainder (.substring str1 1)]
(if (= "" str1-remainder)
(last next-row)
(recur (inc row-nr) next-row str1-remainder str2))))
)


Another implementation using transient data structure, inspired by Common Lisp version:

(defn levenshtein [str1 str2]
"a Clojure levenshtein implementation using transient data structure"
(let [n (count str1) m (count str2)]
(cond
(= 0 n) m
(= 0 m) n
:else
(let [prev-col (transient (vec (range (inc m)))) col (transient [])] ; initialization for the first column.
(dotimes [i n]
(assoc! col 0 (inc i)) ; update col[0]
(dotimes [j m]
(assoc! col (inc j)  ; update col[1..m]
(min (inc (nth col j))
(inc (nth prev-col (inc j)))
(+ (get prev-col j) (if (= (nth str1 i) (nth str2 j)) 0 1)))))
(dotimes [i (count prev-col)]
(assoc! prev-col i (get col i)))) ;
(last (persistent! col)))))) ; last element of last column


## D

import std.string;
import std.algorithm; // for min, already defines levenshteinDistance in library

pure nothrow size_t damerauDistance(const string source, const string target) {
immutable auto sourceLength = source.length;
immutable auto targetLength = target.length;
auto distance = new size_t[][](sourceLength + 1, targetLength+1);
for (auto iSource = 0; iSource <= sourceLength; ++iSource) {
distance[iSource][0] = iSource;
}
for (auto iTarget = 1; iTarget <= targetLength; ++iTarget) {
distance[0][iTarget] = iTarget;
}

for (auto iSource = 1; iSource <= sourceLength; ++iSource) {
for (auto iTarget = 1; iTarget <= targetLength; ++iTarget) {
auto delete1 =  distance[iSource - 1][iTarget] + 1;
auto insert1 =  distance[iSource][iTarget - 1] + 1;
size_t marginalCost = source[iSource - 1] == target[iTarget - 1] ? 0 : 1;
auto substitute = distance[iSource - 1][iTarget - 1] + marginalCost;
distance[iSource][iTarget] = min(delete1, insert1, substitute);
if (iSource > 1 && iTarget > 1 &&
source[iSource - 1] == target[iTarget - 2] &&
source[iSource - 2] == target[iTarget-1]) {
distance[iSource][iTarget] =
min(distance[iSource][iTarget], distance[iSource - 2][iTarget - 2] + marginalCost);
}
}
}
return distance[sourceLength][targetLength];
}


## Delphi

A simple implementation that can certainly be improved upon.

function Levenshtein(Word1, Word2: String): integer;
var lev : array of array of integer;
i,j : integer;
begin
// Word1 := LowerCase(Word1);
// Word2 := LowerCase(Word2);

// If the words are identical, do nothing
if Word1 = Word2 then
begin
result := 0;
exit;
end;

SetLength(lev, length(Word1) + 1);
for i := low(lev) to high(lev) do
setLength(lev[i], length(Word2) + 1);

for i := low(lev) to high(lev) do lev[i][0] := i;
for j := low(lev[low(lev)]) to high(lev[low(lev)]) do lev[0][j] := j;

for i := low(lev)+1 to high(lev) do
for j := low(lev[i])+1 to high(lev[i]) do
lev[i][j] := min(min(lev[i-1][j] + 1,lev[i][j-1] + 1)
,lev[i-1][j-1] + ifthen(Word1[i] = Word2[j], 0, 1));

result := lev[length(word1)][length(word2)];
end;


## Eiffel

	distance (a, b: STRING): INTEGER
-- Compute Levenshtein distance between strings a' and b'.
note
synopsis: "[
Example: Calculate distance between "sitting" and "kitten":

k	i	t	t	e	n
0	1	2	3	4	5	6 <-- set top row to [0 .. m]
s	1	1	2	3	4	5	6
i	2	2	1	2	3	4	5
t	3	3	2	1	2	3	4
t	4	4	3	2	1	2	3
i	5	5	4	3	2	2	3
n	6	6	5	4	3	3	2
g	7	7	6	5	4	4	3! <-- Answer
^
|
set first column to [0 .. n]

Compute top row and left-most columns first. Then go cell-by-cell
(e.g. 1 .. n over 1 .. m), row-by-row, column-by-column, computing:

d[i-1, j] + 1		-- a deletion
d[i, j-1] + 1		-- an insertion
d[i-1, j-1] + 1		-- a substitution

Taking the minimum of each and putting it in its proper place in
the d[i, j] array, unless the char's are the same (e.g. a [i - 1] = b [j - 1])
]"
language_note: "[
Eiffel is a 1-based array system and not 0-based, so do not
read the array element accesses from the 0-based mindset.
]"
local
d: ARRAY2 [INTEGER]
i, j, m, n: INTEGER
do
m := a.count + 1; n := b.count + 1
create d.make_filled (0, m, n)
from i := 0 until i = m loop d [i + 1, 1] := i; i := i + 1 end
from j := 0 until j = n loop d [1, j + 1] := j; j := j + 1 end
from j := 2 until j > n loop
from i := 2 until i > m loop
if a [i - 1] = b [j - 1] then
d[i, j] := d[i-1, j-1]
else
d[i, j] := (d[i-1, j-1] + 1).min (d[i, j-1] + 1).min (d[i-1, j] + 1)
end
i := i + 1
end
j := j + 1
end
Result := d[m, n]
end


## Emacs Lisp

 (defun levenshtein-distance (str1 str2)
"Return the edit distance between strings STR1 and STR2."
(if (not (stringp str1))
(error "Argument was not a string: %s" str1))
(if (not (stringp str2))
(error "Argument was not a string: %s" str2))
(let* ((make-table (function (lambda (columns rows init)
(make-vector rows (make-vector columns init)))))
(tref (function (lambda (table x y)
(aref (aref table y) x))))
(tset (function (lambda (table x y object)
(let ((row (copy-sequence (aref table y))))
(aset row x object)
(aset table y row) object))))
(length-str1 (length str1))
(length-str2 (length str2))
(d (funcall make-table (1+ length-str1) (1+ length-str2) 0)))
(let ((i 0) (j 0))
(while (<= i length-str1)
(funcall tset d i 0 i)
(setq i (1+ i)))
(while (<= j length-str2)
(funcall tset d 0 j j)
(setq j (1+ j))))
(let ((i 1))
(while (<= i length-str1)
(let ((j 1))
(while (<= j length-str2)
(let* ((cost (if (equal (aref str1 (1- i)) (aref str2 (1- j)))
0
1))
(deletion (1+ (funcall tref d (1- i) j)))
(insertion (1+ (funcall tref d i (1- j))))
(substitution (+ (funcall tref d (1- i) (1- j)) cost)))
(funcall tset d i j (min insertion deletion substitution)))
(setq j (1+ j))))
(setq i (1+ i))))
(funcall tref d length-str1 length-str2)))


## Erlang

levenshtein(A, []) -> length(A);
levenshtein([], B) -> length(B);
levenshtein([A | TA] = AA, [B | TB] = BA) ->
lists:min([levenshtein(TA, BA) + 1,
levenshtein(AA, TB) + 1,
levenshtein(TA, TB) + delta(A, B)]).

delta(_A, _A) -> 0;
delta(_A, _B) -> 1.


## F#

The inlined min function gives a big speed boost.

let inline min3 one two three =
if one < two && one < three then one
elif two < three then two
else three

let wagnerFischer (s: string) (t: string) =
let m = s.Length
let n = t.Length
let d = Array2D.create (m + 1) (n + 1) 0

for i = 0 to m do d.[i, 0] <- i
for j = 0 to n do d.[0, j] <- j

for j = 1 to n do
for i = 1 to m do
if s.[i-1] = t.[j-1] then
d.[i, j] <- d.[i-1, j-1]
else
d.[i, j] <-
min3
(d.[i-1, j  ] + 1) // a deletion
(d.[i  , j-1] + 1) // an insertion
(d.[i-1, j-1] + 1) // a substitution
d.[m,n]


and here's a slightly faster lazy version.

let wagnerFischerLazy (s: string) (t: string) =
let m = s.Length
let n = t.Length
let d = Array2D.create (m+1) (n+1) -1
let rec dist =
function
| i, 0 -> i
| 0, j -> j
| i, j when d.[i,j] <> -1 -> d.[i,j]
| i, j ->
let dval =
if s.[i-1] = t.[j-1] then dist (i-1, j-1)
else
min3
(dist (i-1, j)   + 1) // a deletion
(dist (i,   j-1) + 1) // an insertion
(dist (i-1, j-1) + 1) // a substitution
d.[i, j] <- dval; dval
dist (m, n)


## Go

This version uses dynamic programming with time complexity of ${\displaystyle O(mn)}$  where ${\displaystyle m}$  and ${\displaystyle n}$  are lengths of ${\displaystyle a}$  and ${\displaystyle b}$ , and the space complexity is ${\displaystyle n+1}$  of integers plus some constant space(i.e. ${\displaystyle O(n)}$ ).

import "unicode/utf8"
func Levenshtein(a, b string) int {
f := make([]int, utf8.RuneCountInString(b)+1)

for j := range f {
f[j] = j
}

for _, ca := range a {
j := 1
fj1 := f[0] // fj1 is the value of f[j - 1] in last iteration
f[0]++
for _, cb := range b {
mn := min(f[j]+1, f[j-1]+1) // delete & insert
if cb != ca {
mn = min(mn, fj1+1) // change
} else {
mn = min(mn, fj1) // matched
}

fj1, f[j] = f[j], mn // save f[j] to fj1(j is about to increase), update f[j] to mn
j++
}
}

return f[len(f)-1]
}
func min(a, b int) int {
if a <= b {
return a
} else {
return b
}
}


## Groovy

This version is based on the Java version below

class Levenshtein {
def static int distance(String str1, String str2) {
def str1_len = str1.length()
def str2_len = str2.length()
int[][] distance = new int[str1_len + 1][str2_len + 1]
(str1_len + 1).times { distance[it][0] = it }
(str2_len + 1).times { distance[0][it] = it }
(1..str1_len).each { i ->
(1..str2_len).each { j ->
distance[i][j] = [distance[i-1][j]+1, distance[i][j-1]+1, str1[i-1]==str2[j-1]?distance[i-1][j-1]:distance[i-1][j-1]+1].min()
}
}
distance[str1_len][str2_len]
}
}


Tested with GHCi.

levenshtein::String->String->Int
levenshtein s t =
d!!(length s)!!(length t)
where d = [[distance m n|n<-[0..length t]]|m<-[0..length s]]
distance i 0 = i
distance 0 j = j
distance i j = minimum [d!!(i-1)!!j+1, d!!i!!(j-1)+1, d!!(i-1)!!(j-1) + (if s!!(i-1)==t!!(j-1) then 0 else 1)]


and here's a slightly faster version.

levenshtein::String->String->Int
levenshtein s t =
d!!(length s)!!(length t)
where d = [[distance m n|n<-[0..length t]]|m<-[0..length s]]
distance i 0 = i
distance 0 j = j
distance i j = if s!!(i-1)==t!!(j-1) then d!!(i-1)!!(j-1) else
1 + minimum [d!!(i-1)!!j, d!!i!!(j-1), d!!(i-1)!!(j-1)]


For large strings, using arrays is much faster

import Data.Array.Unboxed
import Data.Array.ST

for_ xs f =  mapM_ f xs

levenshtein :: [Char] -> [Char] -> Int
levenshtein s t = d ! (ls , lt)
where s' = array (0,ls) [ (i,x) | (i,x) <- zip [0..] s ]::UArray Int Char
t' = array (0,lt) [ (i,x) | (i,x) <- zip [0..] t ]::UArray Int Char
ls = length s
lt = length t
(l,h) = ((0,0),(length s,length t))
d = runSTUArray $do m <- newArray (l,h) 0 :: ST s (STUArray s (Int,Int) Int) for_ [0..ls]$ \i -> writeArray m (i,0) i
for_ [0..lt] $\j -> writeArray m (0,j) j for_ [1..lt]$ \j -> do
for_ [1..ls] $\i -> do let c = if s'!(i-1)==t'! (j-1) then 0 else 1 x <- readArray m (i-1,j) y <- readArray m (i,j-1) z <- readArray m (i-1,j-1) writeArray m (i,j)$ minimum [x+1, y+1, z+c ]
return m


As recursively defined array:

import Array
toArray l = listArray (0, length l - 1) l                  -- makes an Array from a List
mkArray f bnds = array bnds [ (i, f i) | i <- range bnds ] -- defines an Array over its indices

levenshtein sa sb = table
where
arrA = toArray sa
arrB = toArray sb
table = mkArray f ((0,0), (length sa , length sb))
f (ia, 0) = ia
f (0 ,ib) = ib
f (ia,ib)
| a == b    = table ! (ia-1,ib-1)
| otherwise = 1 + minimum [ table ! x | x <- [(ia-1,ib-1),(ia-1,ib),(ia,ib-1)] ]
where
a = arrA ! (ia - 1)
b = arrB ! (ib - 1)


And finally: fast but cryptic implementation

levenshtein2 sa sb = last $foldl transform [0..length sa] sb where transform xs@(x:xs') c = scanl compute (x+1) (zip3 sa xs xs') where compute z (c', x, y) = minimum [y+1, z+1, x + fromEnum (c' /= c)]  ## Io  Levenshtein := method(left, right, if(right size < left size, return Levenshtein(right, left)) current := 0 to(left size) asList right foreach(i, righti, previous := current current = List clone with(i + 1) left foreach(j, leftj, current append((current at(j) + 1) min(previous at(j + 1) + 1) min(previous at(j) + if(leftj == righti, 0, 1))) ) ) current last )  ## JavaScript Slow recursive version: function levenshteinDistance (s, t) { if (s.length === 0) return t.length; if (t.length === 0) return s.length; return Math.min( levenshteinDistance(s.substr(1), t) + 1, levenshteinDistance(t.substr(1), s) + 1, levenshteinDistance(s.substr(1), t.substr(1)) + (s[0] !== t[0] ? 1 : 0) ); }  Faster approach using dynamic programming // Compute the edit distance between the two given strings function getEditDistance(a, b) { if (a.length === 0) return b.length; if (b.length === 0) return a.length; var matrix = []; // increment along the first column of each row var i; for (i = 0; i <= b.length; i++) { matrix[i] = [i]; } // increment each column in the first row var j; for (j = 0; j <= a.length; j++) { matrix[0][j] = j; } // Fill in the rest of the matrix for (i = 1; i <= b.length; i++) { for (j = 1; j <= a.length; j++) { if (b.charAt(i-1) == a.charAt(j-1)) { matrix[i][j] = matrix[i-1][j-1]; } else { matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution Math.min(matrix[i][j-1] + 1, // insertion matrix[i-1][j] + 1)); // deletion } } } return matrix[b.length][a.length]; };  ## Java public class LevenshteinDistance { private static int minimum(int a, int b, int c) { return Math.min(Math.min(a, b), c); } public static int computeLevenshteinDistance(CharSequence lhs, CharSequence rhs) { int[][] distance = new int[lhs.length() + 1][rhs.length() + 1]; for (int i = 0; i <= lhs.length(); i++) distance[i][0] = i; for (int j = 1; j <= rhs.length(); j++) distance[0][j] = j; for (int i = 1; i <= lhs.length(); i++) for (int j = 1; j <= rhs.length(); j++) distance[i][j] = minimum( distance[i - 1][j] + 1, distance[i][j - 1] + 1, distance[i - 1][j - 1] + ((lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1)); return distance[lhs.length()][rhs.length()]; } }  Not recursive and faster public int levenshteinDistance (CharSequence lhs, CharSequence rhs) { int len0 = lhs.length() + 1; int len1 = rhs.length() + 1; // the array of distances int[] cost = new int[len0]; int[] newcost = new int[len0]; // initial cost of skipping prefix in String s0 for (int i = 0; i < len0; i++) cost[i] = i; // dynamically computing the array of distances // transformation cost for each letter in s1 for (int j = 1; j < len1; j++) { // initial cost of skipping prefix in String s1 newcost[0] = j; // transformation cost for each letter in s0 for(int i = 1; i < len0; i++) { // matching current letters in both strings int match = (lhs.charAt(i - 1) == rhs.charAt(j - 1)) ? 0 : 1; // computing cost for each transformation int cost_replace = cost[i - 1] + match; int cost_insert = cost[i] + 1; int cost_delete = newcost[i - 1] + 1; // keep minimum cost newcost[i] = Math.min(Math.min(cost_insert, cost_delete), cost_replace); } // swap cost/newcost arrays int[] swap = cost; cost = newcost; newcost = swap; } // the distance is the cost for transforming all letters in both strings return cost[len0 - 1]; }  ## Julia Straightforward recursive implementation. Do not use for large strings! function levenshtein(lhs::String, rhs::String, lhsi::Int = length(lhs), rhsi::Int = length(rhs)) if lhsi == 0 return rhsi end if rhsi == 0 return lhsi end cost = lhs[lhsi] == rhs[rhsi] ? 0 : 1 min( levenshtein(lhs, rhs, lhsi - 1, rhsi) + 1, levenshtein(lhs, rhs, lhsi, rhsi - 1) + 1, levenshtein(lhs, rhs, lhsi - 1, rhsi - 1) + cost ) end  Test with @show levenshtein("aaa", "ab") @show levenshtein("kitten", "sitting")  ## Kotlin fun levenshtein(lhs : CharSequence, rhs : CharSequence) : Int { val lhsLength = lhs.length val rhsLength = rhs.length var cost = IntArray(lhsLength + 1) { it } var newCost = IntArray(lhsLength + 1) { 0 } for (i in 1..rhsLength) { newCost[0] = i for (j in 1..lhsLength) { val editCost= if(lhs[j - 1] == rhs[i - 1]) 0 else 1 val costReplace = cost[j - 1] + editCost val costInsert = cost[j] + 1 val costDelete = newCost[j - 1] + 1 newCost[j] = minOf(costInsert, costDelete, costReplace) } val swap = cost cost = newCost newCost = swap } return cost[lhsLength] }  ## MySQL This implementation seems to be broken; but I am not entirely sure how to fix it. DROP FUNCTION IF EXISTS levenshtein; CREATE FUNCTION levenshtein(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS TINYINT UNSIGNED NO SQL DETERMINISTIC BEGIN DECLARE s1_len, s2_len, i, j, c, c_temp TINYINT UNSIGNED; -- max strlen=255 for this function DECLARE cv0, cv1 VARBINARY(256); -- if any param is NULL return NULL -- (consistent with builtin functions) IF (s1 + s2) IS NULL THEN RETURN NULL; END IF; SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; -- if any string is empty, -- distance is the length of the other one IF (s1 = s2) THEN RETURN 0; ELSEIF (s1_len = 0) THEN RETURN s2_len; ELSEIF (s2_len = 0) THEN RETURN s1_len; END IF; WHILE (j <= s2_len) DO SET cv1 = CONCAT(cv1, CHAR(j)), j = j + 1; END WHILE; WHILE (i <= s1_len) DO SET c = i, cv0 = CHAR(i), j = 1; -- What for syntax of WHILE is that?? MYSQL do not now. Please help. WHILE (j <= c_temp) THEN SET c = c_temp; END IF; SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1; IF (c > c_temp) THEN SET c = c_temp; END IF; SET cv0 = CONCAT(cv0, CHAR(c)), j = j + 1; END WHILE; SET cv1 = cv0, i = i + 1; END WHILE; RETURN c; END;  The implementation below, thanks to Jason Rust at Code Janitor, seems to be more complete. CREATE FUNCTION levenshtein( s1 VARCHAR(255), s2 VARCHAR(255) ) RETURNS INT DETERMINISTIC BEGIN DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; DECLARE s1_char CHAR; -- max strlen=255 DECLARE cv0, cv1 VARBINARY(256); SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; IF s1 = s2 THEN RETURN 0; ELSEIF s1_len = 0 THEN RETURN s2_len; ELSEIF s2_len = 0 THEN RETURN s1_len; ELSE WHILE j <= s2_len DO SET cv1 = CONCAT(cv1, UNHEX(HEX(j))), j = j + 1; END WHILE; WHILE i <= s1_len DO SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = UNHEX(HEX(i)), j = 1; WHILE j <= s2_len DO SET c = c + 1; IF s1_char = SUBSTRING(s2, j, 1) THEN SET cost = 0; ELSE SET cost = 1; END IF; SET c_temp = CONV(HEX(SUBSTRING(cv1, j, 1)), 16, 10) + cost; IF c > c_temp THEN SET c = c_temp; END IF; SET c_temp = CONV(HEX(SUBSTRING(cv1, j+1, 1)), 16, 10) + 1; IF c > c_temp THEN SET c = c_temp; END IF; SET cv0 = CONCAT(cv0, UNHEX(HEX(c))), j = j + 1; END WHILE; SET cv1 = cv0, i = i + 1; END WHILE; END IF; RETURN c; END;  ## Objective-C - (NSInteger)computeLevenshteinDistanceFrom:(NSString *)source to:(NSString *)target { NSUInteger sourceLength = [source length] + 1; NSUInteger targetLength = [target length] + 1; NSMutableArray *cost = [NSMutableArray new]; NSMutableArray *newCost = [NSMutableArray new]; for (NSUInteger i = 0; i < sourceLength; i++) { cost[i] = @(i); } for (NSUInteger j = 1; j < targetLength; j++) { newCost[0] = @(j - 1); for (NSUInteger i = 1; i < sourceLength; i++) { NSInteger match = ([source characterAtIndex:i - 1] == [target characterAtIndex:j - 1]) ? 0 : 1; NSInteger costReplace = [cost[i - 1] integerValue] + match; NSInteger costInsert = [cost[i] integerValue] + 1; NSInteger costDelete = [newCost[i - 1] integerValue] + 1; newCost[i] = @(MIN(MIN(costInsert, costDelete), costReplace)); } NSMutableArray *swap = cost; cost = newCost; newCost = swap; } return [cost[sourceLength - 1] integerValue]; }  Or with extensions "and C flavor" #define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) @implementation NSString (LevensteinDistance) - (NSUInteger)distance:(NSString *)query { const char *selfCharArray = [self UTF8String]; const char *queryCharArray = [query UTF8String]; NSUInteger x, y; NSUInteger selfLength = strlen(selfCharArray); NSUInteger queryLength = strlen(queryCharArray); NSUInteger matrix[queryLength + 1][selfLength + 1]; matrix[0][0] = 0; for (x = 1; x <= queryLength; x++) { matrix[x][0] = matrix[x - 1][0] + 1; } for (y = 1; y <= selfLength; y++) { matrix[0][y] = matrix[0][y - 1] + 1; } for (x = 1; x <= queryLength; x++) { for (y = 1; y <= selfLength; y++) { matrix[x][y] = MIN3(matrix[x - 1][y] + 1, matrix[x][y - 1] + 1, matrix[x - 1][y - 1] + (selfCharArray[y - 1] == queryCharArray[x - 1] ? 0 : 1)); } } return (matrix[queryLength][selfLength]); } @end  ## OCaml (* Minimum of three integers. This function is deliberately * not polymorphic because (1) we only need to compare integers * and (2) the OCaml compilers do not perform type specialization * for user-defined functions. *) let minimum (x:int) y z = let m' (a:int) b = if a < b then a else b in m' (m' x y) z (* Matrix initialization. *) let init_matrix n m = let init_col = Array.init m in Array.init n (function | 0 -> init_col (function j -> j) | i -> init_col (function 0 -> i | _ -> 0) ) (* Computes the Levenshtein distance between two unistring. * If you want to run it faster, add the -unsafe option when * compiling or use Array.unsafe_* functions (but be carefull * with these well-named unsafe features). *) let distance_utf8 x y = match Array.length x, Array.length y with | 0, n -> n | m, 0 -> m | m, n -> let matrix = init_matrix (m + 1) (n + 1) in for i = 1 to m do let s = matrix.(i) and t = matrix.(i - 1) in for j = 1 to n do let cost = abs (compare x.(i - 1) y.(j - 1)) in s.(j) <- minimum (t.(j) + 1) (s.(j - 1) + 1) (t.(j - 1) + cost) done done; matrix.(m).(n) (* This function takes two strings, convert them to unistring (int array) * and then call distance_utf8, so we can compare utf8 strings. Please * note that you need Glib (see LablGTK). *) let distance x y = distance_utf8 (Glib.Utf8.to_unistring x) (Glib.Utf8.to_unistring y)  ## Octave And MATLAB function [dist,L]=levenshtein_distance(str1,str2) L1=length(str1)+1; L2=length(str2)+1; L=zeros(L1,L2); g=+1;%just constant m=+0;%match is cheaper, we seek to minimize d=+1;%not-a-match is more costly. %do BC's L(:,1)=([0:L1-1]*g)'; L(1,:)=[0:L2-1]*g; m4=0;%loop invariant for idx=2:L1; for idy=2:L2 if(str1(idx-1)==str2(idy-1)) score=m; else score=d; end m1=L(idx-1,idy-1) + score; m2=L(idx-1,idy) + g; m3=L(idx,idy-1) + g; L(idx,idy)=min(m1,min(m2,m3)); end end dist=L(L1,L2); return end  %I think this function generates errors: %>> levenshtein('aaa','ab') % ans = % 1 %correct answer here is 2, I believe: 1 deletion, 1 replacement? function q = levenshtein(s1,s2) d = toeplitz(find(s1),find(s2))-1; for j = 1:numel(s2)-1 for i = 1:numel(s1)-1 d(i+1,j+1) = min(min(d(i,j+1),d(i+1,j)) + 1,d(i,j) + ne(s1(i), s2(j))); end end q = d(end) + eq(i,j)*ne(s1(end),s2(end)); % check if last chars are different  ## Perl use List::Util qw(min); sub levenshtein { my ($str1, $str2) = @_; my @ar1 = split //,$str1;
my @ar2 = split //, $str2; my @dist;$dist[$_][0] =$_ foreach (0 .. @ar1);
$dist[0][$_] = $_ foreach (0 .. @ar2); foreach my$i (1 .. @ar1){
foreach my $j (1 .. @ar2){ my$cost = $ar1[$i - 1] eq $ar2[$j - 1] ? 0 : 1;
$dist[$i][$j] = min($dist[$i - 1][$j] + 1,
$dist[$i][$j - 1] + 1,$dist[$i - 1][$j - 1] + $cost ); } } return$dist[@ar1][@ar2];
}


## Phix

function levenshtein(string a, b)
sequence costs = tagset(length(b)+1,0)
for i=1 to length(a) do
costs[1] = i
integer newcost = i-1, pj = i, cj
for j=1 to length(b) do
cj = costs[j+1]
pj = min({pj+1, cj+1, newcost+(a[i]!=b[j])})
{costs[j+1],newcost} = {pj,cj}
end for
end for
return costs[$-1] end function ## PHP Please note that there is a standard library call levenshtein()[1] in PHP as of version 4.0.1. It is limited to comparing strings of no more than 255 characters in length, however, limiting its utility. function leven($s1, $s2){$l1 = strlen($s1); // Length of string$s1
$l2 = strlen($s2);                    // Length of $s2$dis = range(0, $l2); // Array with (0,1,2,...,n) for ($x = 1; $x <=$l1; $x++) {$dis_new[0] = $x; for ($y = 1; $y <=$l2; $y++) {$c = ($s1[$x - 1] == $s2[$y - 1]) ? 0 : 1;
$dis_new[$y] = min($dis[$y] + 1, $dis_new[$y - 1] + 1, $dis[$y - 1] + $c); }$dis = $dis_new; } return$dis[l2]; }  ## plpgsql Copied the PHP version using a single dimensional array CREATE OR REPLACE FUNCTION levenshtein(s text, t text) RETURNS integer AS $$DECLARE i integer; DECLARE j integer; DECLARE m integer; DECLARE n integer; DECLARE d integer[]; DECLARE c integer; BEGIN m := char_length(s); n := char_length(t); i := 0; j := 0; FOR i IN 0..m LOOP d[i*(n+1)] = i; END LOOP; FOR j IN 0..n LOOP d[j] = j; END LOOP; FOR i IN 1..m LOOP FOR j IN 1..n LOOP IF SUBSTRING(s,i,1) = SUBSTRING(t, j,1) THEN c := 0; ELSE c := 1; END IF; d[i*(n+1)+j] := LEAST(d[(i-1)*(n+1)+j]+1, d[i*(n+1)+j-1]+1, d[(i-1)*(n+1)+j-1]+c); END LOOP; END LOOP; return d[m*(n+1)+n]; END;$$ LANGUAGE plpgsql IMMUTABLE  ## Python The first version is a Dynamic Programming algorithm, with the added optimization that only the last two rows of the dynamic programming matrix are needed for the computation: def levenshtein(s1, s2): if len(s1) < len(s2): return levenshtein(s2, s1) # len(s1) >= len(s2) if len(s2) == 0: return len(s1) previous_row = range(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer deletions = current_row[j] + 1 # than s2 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1]  Second version: def lev(a, b): if not a: return len(b) if not b: return len(a) return min(lev(a[1:], b[1:])+(a[0] != b[0]), lev(a[1:], b)+1, lev(a, b[1:])+1)  (Note that while very compact, the runtime of this implementation is really poor, making it unusable in practical usage.) Third version (works): def LD(s,t): s = ' ' + s t = ' ' + t d = {} S = len(s) T = len(t) for i in range(S): d[i, 0] = i for j in range (T): d[0, j] = j for j in range(1,T): for i in range(1,S): if s[i] == t[j]: d[i, j] = d[i-1, j-1] else: d[i, j] = min(d[i-1, j], d[i, j-1], d[i-1, j-1]) + 1 return d[S-1, T-1]  (Note that while compact, the runtime of this implementation is relatively poor.) 4th version: def levenshtein(seq1, seq2): oneago = None thisrow = range(1, len(seq2) + 1) + [0] for x in xrange(len(seq1)): twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1] for y in xrange(len(seq2)): delcost = oneago[y] + 1 addcost = thisrow[y - 1] + 1 subcost = oneago[y - 1] + (seq1[x] != seq2[y]) thisrow[y] = min(delcost, addcost, subcost) return thisrow[len(seq2) - 1]  (Note this implementation is O(N*M) time and O(M) space, for N and M the lengths of the two sequences.) 5th, a vectorized version of the 1st, using NumPy. About 40% faster, on my test case. However, this numpy vectorized Python version incorrectly returns 5 for levenshtein('aabcc', 'bccdd') (which should be 4 for two 'a' insertions + 2 'd' deletions). The reason is that the deletion step np.minimum(current_row[1:], current_row[0:-1] + 1) does not handle the case that the minimum for current_row[j] could be current_row[j-2] + 2 due to two consecutive deletions as it is in this example. See Discussion. def levenshtein(source, target): if len(source) < len(target): return levenshtein(target, source) # So now we have len(source) >= len(target). if len(target) == 0: return len(source) # We call tuple() to force strings to be used as sequences # ('c', 'a', 't', 's') - numpy uses them as values by default. source = np.array(tuple(source)) target = np.array(tuple(target)) # We use a dynamic programming algorithm, but with the # added optimization that we only need the last two rows # of the matrix. previous_row = np.arange(target.size + 1) for s in source: # Insertion (target grows longer than source): current_row = previous_row + 1 # Substitution or matching: # Target and source items are aligned, and either # are different (cost of 1), or are the same (cost of 0). current_row[1:] = np.minimum( current_row[1:], np.add(previous_row[:-1], target != s)) # Deletion (target grows shorter than source): current_row[1:] = np.minimum( current_row[1:], current_row[0:-1] + 1) previous_row = current_row return previous_row[-1]  (Note this implementation only works if the weight does not depend on the character edited.) 6th version, from Wikipedia article on Levenshtein Distance; Iterative with two matrix rows. # Christopher P. Matthews # christophermatthews1985@gmail.com # Sacramento, CA, USA def levenshtein(s, t): ''' From Wikipedia article; Iterative with two matrix rows. ''' if s == t: return 0 elif len(s) == 0: return len(t) elif len(t) == 0: return len(s) v0 = [None] * (len(t) + 1) v1 = [None] * (len(t) + 1) for i in range(len(v0)): v0[i] = i for i in range(len(s)): v1[0] = i + 1 for j in range(len(t)): cost = 0 if s[i] == t[j] else 1 v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost) for j in range(len(v0)): v0[j] = v1[j] return v1[len(t)]  ## R/S+ function (str1, str2) { if (typeof(str1) != "character" && class(str1) != "factor") stop(sprintf("Illegal data type: %s", typeof(str1))) if (class(str1) == "factor") str = as.character(str1) if (typeof(str2) != "character" && class(str2) != "factor") stop(sprintf("Illegal data type: %s", typeof(str2))) if (class(str2) == "factor") str = as.character(str2) if ((is.array(str1) || is.array(str2)) && dim(str1) != dim(str2)) stop("non-conformable arrays") if (length(str1) == 0 || length(str2) == 0) return(integer(0)) l1 <- length(str1) l2 <- length(str2) out <- .C("levenshtein", as.character(str1), as.character(str2), l1, l2, ans = integer(max(l1, l2)), PACKAGE = "RecordLinkage") if (any(is.na(str1), is.na(str2))) outans[is.na(str1) | is.na(str2)] = NA
if (is.array(str1))
return(array(out$ans, dim(str1))) if (is.array(str2)) return(array(out$ans, dim(str2)))
return(out$ans) }  "(Note) this is just one of many implementations of the Levenshtein Distance, but I've not been able to get the others to work ## rexx /* rexx levenshtein calculates the edit distance Karlocai 2009-01-18 between 2 strings s and t This implementation of the Levenshtein algorithm uses one row only (0..n), containing - values of the previous line in columns [i-1]..n and - values of the current line in columns 1..[i-2] The current left value is kept in the variable lc i: column 1..n of s, 1st index j: row 1..m of t, 2nd index */ Parse Arg s,t -- gets 2 strings as parameter n = Length(s) -- checks the parameters m = Length(t) If n = 0 Then Return m If m = 0 Then Return n Do i = 0 To n -- initializes the 1st row r.i = i End Do j = 1 To m -- for each row lc = j -- left column start value Do i = 1 To n -- for each column nv = Min(r.i + 1, , lc + 1, , r.[i-1] + (Substr(s,i,1) <> Substr(t,j,1))) r.[i-1] = lc -- sets previous left column lc = nv -- current left column End r.n = lc -- sets last current value End Return r.n  ## Ruby This Ruby version is simple, but extremely slow, though it works with any Array with elements that implement '=='. def levenshtein(first:, second:) case when first.empty? then second.length when second.empty? then first.length else [(first[0] == second[0] ? 0 : 1) + levenshtein(first: first[1..-1], second: second[1..-1]), 1 + levenshtein(first: first[1..-1], second: second), 1 + levenshtein(first: first, second: second[1..-1])].min end end  This memoized version is significantly faster def levenshtein(first:, second:) matrix = [(0..first.length).to_a] (1..second.length).each do |j| matrix << [j] + [0] * (first.length) end (1..second.length).each do |i| (1..first.length).each do |j| if first[j-1] == second[i-1] matrix[i][j] = matrix[i-1][j-1] else matrix[i][j] = [ matrix[i-1][j], matrix[i][j-1], matrix[i-1][j-1], ].min + 1 end end end return matrix.last.last end  And another memoized version def levenshtein(first:, second:) m, n = first.length, second.length return m if n == 0 return n if m == 0 # Create our distance matrix d = Array.new(m+1) {Array.new(n+1)} 0.upto(m) { |i| d[i][0] = i } 0.upto(n) { |j| d[0][j] = j } 1.upto(n) do |j| 1.upto(m) do |i| d[i][j] = first[i-1] == second[j-1] ? d[i-1][j-1] : [d[i-1][j]+1,d[i][j-1]+1,d[i-1][j-1]+1,].min end end d[m][n] end  A faster implementation for the Levenshtein distance of strings (using C bindings) is available here ## Rust A simple implementation in Rust (beta2), transposed from the C version. It is a little more general than the bare minimum, in fact it accepts any type that can be converted to a string. fn levenshtein_d1stance<T>(s1: &T, s2: &T) -> usize where T: ToString { let v1: Vec<char> = s1.to_string().chars().collect(); let v2: Vec<char> = s2.to_string().chars().collect(); let v1len = v1.len(); let v2len = v2.len(); // Early exit if one of the strings is empty if v1len == 0 { return v2len; } if v2len == 0 { return v1len; } fn min3<T: Ord>(v1: T, v2: T, v3: T) -> T{ cmp::min(v1, cmp::min(v2, v3)) } fn delta(x: char, y: char) -> usize { if x == y { 0 } else { 1 } } let mut column: Vec<usize> = (0..v1len+1).collect(); for x in 1..v2len+1 { column[0] = x; let mut lastdiag = x-1; for y in 1..v1len+1 { let olddiag = column[y]; column[y] = min3(column[y] + 1, column[y-1] + 1, lastdiag + delta(v1[y-1], v2[x-1])); lastdiag = olddiag; } } column[v1len] }  You can test it live on Rust Playpen. ## Scala ### Imperative version A functional version would likely be far more concise. def levenshtein(str1: String, str2: String): Int = { val lenStr1 = str1.length val lenStr2 = str2.length val d: Array[Array[Int]] = Array.ofDim(lenStr1 + 1, lenStr2 + 1) for (i <- 0 to lenStr1) d(i)(0) = i for (j <- 0 to lenStr2) d(0)(j) = j for (i <- 1 to lenStr1; j <- 1 to lenStr2) { val cost = if (str1(i - 1) == str2(j - 1)) 0 else 1 d(i)(j) = min( d(i-1)(j ) + 1, // deletion d(i )(j-1) + 1, // insertion d(i-1)(j-1) + cost // substitution ) } d(lenStr1)(lenStr2) } def min(nums: Int*): Int = nums.min  ### Functional version import scala.collection.mutable import scala.collection.parallel.ParSeq def levenshtein(s1: String, s2: String): Int = { val memorizedCosts = mutable.Map[(Int, Int), Int]() def lev: ((Int, Int)) => Int = { case (k1, k2) => memorizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match { case (i, 0) => i case (0, j) => j case (i, j) => ParSeq(1 + lev((i - 1, j)), 1 + lev((i, j - 1)), lev((i - 1, j - 1)) + (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min }) } lev((s1.length, s2.length)) }  ## Scheme ;; using the match' macro as defined in http://synthcode.com/scheme/match.scm (define/memoized (edit-distance a b) (match (,a ,b) ((,a ()) (length a)) ((() ,b) (length b)) (((,a0 . ,a*) (,b0 . ,b*)) (min (+ (edit-distance a* b) 1) (+ (edit-distance a b*) 1) (+ (edit-distance a* b*) (if (equal? a0 b0) 0 2)))))) ;; where the define/memoized' special form can be defined in the following way: (define (memoized proc) (let ((cache (make-hash-table))) (lambda args (match (hash-get-handle cache args) ((,key . ,memoized-result) (apply values memoized-result)) (_ (call-with-values (lambda () (apply proc args)) (lambda result (hash-set! cache args result) (apply values result)))))))) (define-syntax-rule (define/memoized (name . args) . body) (define name (memoized (lambda args . body))))  ## Smalltalk levenshteinDistanceBetween: stringOne and: stringTwo "Iterative calculation of the Levenshtein distance between two strings." | arrayTwo arrayOne| " degenerate cases" stringTwo = stringOne ifTrue:[^0]. (stringTwo size) = 0 ifTrue:[^(stringOne size)]. (stringOne size) = 0 ifTrue:[^(stringTwo size)]. "create two work vectors of integer distances" arrayOne := Array new:((stringTwo size) + 1). arrayTwo := Array new:((stringTwo size) + 1). "initialize v0 (the previous row of distances) this row is A[0][i]: edit distance for an empty s the distance is just the number of characters to delete from t" (1 to: (arrayOne size)) do:[ :i | arrayOne at: i put: i-1 ]. (1 to: (stringOne size)) do: [ : i | " calculate v1 (current row distances) from the previous row v0 first element of v1 is A[i+1][0] edit distance is delete (i+1) chars from s to match empty t" arrayTwo at: 1 put: i. " use formula to fill in the rest of the row" (1 to: (stringTwo size)) do: [ :j || cost minimum minimumAux | ((stringOne at: i) = (stringTwo at: j)) ifTrue:[cost:=0] ifFalse:[cost:=1]. minimumAux := ((arrayTwo at: j) + 1) min: ((arrayOne at: (j+1)) + 1). minimum := minimumAux min: ((arrayOne at: j) + cost). arrayTwo at: (j+1) put: minimum.]. (1 to: (arrayOne size)) do: [ :j | arrayOne at: j put: (arrayTwo at: j) ]. ]. ^arrayTwo at: ((stringTwo size)+1).  ## Swift /** * Levenshtein edit distance calculator * * Inspired by https://gist.github.com/bgreenlee/52d93a1d8fa1b8c1f38b * Improved with http://stackoverflow.com/questions/26990394/slow-swift-arrays-and-strings-performance */ class Levenshtein { private class func min(numbers: Int...) -> Int { return numbers.reduce(numbers[0], combine: {$0 < $1 ?$0 : $1}) } class Array2D { var cols:Int, rows:Int var matrix: [Int] init(cols:Int, rows:Int) { self.cols = cols self.rows = rows matrix = Array(count:cols*rows, repeatedValue:0) } subscript(col:Int, row:Int) -> Int { get { return matrix[cols * row + col] } set { matrix[cols*row+col] = newValue } } func colCount() -> Int { return self.cols } func rowCount() -> Int { return self.rows } } class func distanceBetween(aStr: String, and bStr: String) -> Int { let a = Array(aStr.utf16) let b = Array(bStr.utf16) let dist = Array2D(cols: a.count + 1, rows: b.count + 1) for i in 1...a.count { dist[i, 0] = i } for j in 1...b.count { dist[0, j] = j } for i in 1...a.count { for j in 1...b.count { if a[i-1] == b[j-1] { dist[i, j] = dist[i-1, j-1] // noop } else { dist[i, j] = min( dist[i-1, j] + 1, // deletion dist[i, j-1] + 1, // insertion dist[i-1, j-1] + 1 // substitution ) } } } return dist[a.count, b.count] } }  ## Turing %find the levenshtein distance function LD (s, t : string) : int % a couple variables var cell_above := 0 var cell_left := 0 var cell_diagonal := 0 var cost := 0 %lengths var n := length (s) var m := length (t) %if a string is zero if n = 0 then result m elsif m = 0 then result n else end if % make an array with both words var matrix : array 0 .. n, 0 .. m of int % initialize array to the length of each word for i : 0 .. n matrix (i, 0) := i end for for i : 0 .. m matrix (0, i) := i end for % calculate total changes for i : 1 .. n for j : 1 .. m if s (i) = t (j) then cost := 0 else cost := 1 end if cell_above := matrix (i - 1, j) + 1 cell_left := matrix (i, j - 1) + 1 cell_diagonal := matrix (i - 1, j - 1) + cost var mini := min (cell_above, cell_left) matrix (i, j) := min (mini, cell_diagonal) end for end for result matrix (n, m) end LD put LD ("s", "t") ## VBScript This version is identical to JavaScript and PHP implementations in this article. Function levenshtein( a, b ) Dim i,j,cost,d,min1,min2,min3 ' Avoid calculations where there there are empty words If Len( a ) = 0 Then levenshtein = Len( b ): Exit Function If Len( b ) = 0 Then levenshtein = Len( a ): Exit Function ' Array initialization ReDim d( Len( a ), Len( b ) ) For i = 0 To Len( a ): d( i, 0 ) = i: Next For j = 0 To Len( b ): d( 0, j ) = j: Next ' Actual calculation For i = 1 To Len( a ) For j = 1 To Len( b ) If Mid(a, i, 1) = Mid(b, j, 1) Then cost = 0 Else cost = 1 End If ' Since min() function is not a part of VBScript, we'll "emulate" it below min1 = ( d( i - 1, j ) + 1 ) min2 = ( d( i, j - 1 ) + 1 ) min3 = ( d( i - 1, j - 1 ) + cost ) If min1 <= min2 And min1 <= min3 Then d( i, j ) = min1 ElseIf min2 <= min1 And min2 <= min3 Then d( i, j ) = min2 Else d( i, j ) = min3 End If Next Next levenshtein = d( Len( a ), Len( b ) ) End Function  ## Visual Basic for Applications (no Damerau extension) This version is identical to JavaScript and PHP implementations in this article. I had problems when I tried to use the other VBA implementation in this article, so I had to adopt the version below. Application.WorksheetFunction.Min() method is Excel-specific. If you implement it with other VBA-enabled applications, uncomment the conditional block and comment out the Application.WorksheetFunction.Min() line. Function levenshtein(a As String, b As String) As Integer Dim i As Integer Dim j As Integer Dim cost As Integer Dim d(,) As Integer Dim min1 As Integer Dim min2 As Integer Dim min3 As Integer If Len( a ) = 0 Then levenshtein = Len( b ) Exit Function End If If Len( b ) = 0 Then levenshtein = Len( a ) Exit Function End If ReDim d(Len(a), Len(b)) For i = 0 To Len(a) d(i, 0) = i Next For j = 0 To Len(b) d(0, j) = j Next For i = 1 To Len(a) For j = 1 To Len(b) If Mid(a, i, 1) = Mid(b, j, 1) Then cost = 0 Else cost = 1 End If ' Since Min() function is not a part of VBA, we'll "emulate" it below min1 = (d(i - 1, j) + 1) min2 = (d(i, j - 1) + 1) min3 = (d(i - 1, j - 1) + cost) ' If min1 <= min2 And min1 <= min3 Then ' d(i, j) = min1 ' ElseIf min2 <= min1 And min2 <= min3 Then ' d(i, j) = min2 ' Else ' d(i, j) = min3 ' End If ' In Excel we can use Min() function that is included ' as a method of WorksheetFunction object d(i, j) = Application.WorksheetFunction.Min(min1, min2, min3) Next Next levenshtein = d(Len(a), Len(b)) End Function  ## MapBasic This version is identical to VB implementations in this article. type twoDimArrayType secondArray() as integer end type dim FirstArray() as twoDimArrayType Dim i As Integer Dim j As Integer Dim cost As Integer Dim d() As Integer Dim min1 As Integer Dim min2 As Integer Dim min3 As Integer declare Function calculateLevenshteinDistance(byval firstString As String, byval secondString As String) As Integer Function calculateLevenshteinDistance(byval firstString As String, byval secondString As String) As Integer print chr$(12)
'If one of the parameter is null, then the result will be the length of the other parameter...
if Len(firstString) = 0 Then
calculateLevenshteinDistance = Len(secondString)
exit Function
end if
if Len(secondString) = 0 Then
calculateLevenshteinDistance = Len(firstString)
exit Function
end if

'Initializing array...
redim FirstArray(len(firstString)+1)
for i=1 to ubound(FirstArray)
redim FirstArray(i).secondArray(len(secondString)+1)
next

'Deletion...
For i = 1 To ubound(FirstArray)
FirstArray(i).secondArray(1) = i-1
Next

'Insertion ...
For i = 1 To ubound(FirstArray(ubound(FirstArray)).secondArray)
FirstArray(1).secondArray(i) = i-1
Next

'Actual calculation...
for i=2 to ubound(FirstArray)
for j=2 to ubound(FirstArray(i).secondArray)
If StringCompare(Mid$(firstString, i-1, 1), Mid$(secondString, j-1, 1))=0  Then
cost = 0
Else
cost = 1
End If
min1 =  FirstArray(i-1).secondArray(j) + 1
min2 =  FirstArray(i).secondArray(j-1) + 1
min3 =  FirstArray(i-1).secondArray(j-1) + cost
If min1 <= min2 And min1 <= min3 Then
FirstArray(i).secondArray(j) = min1
ElseIf min2 <= min1 And min2 <= min3 Then
FirstArray(i).secondArray(j) = min2
Else
FirstArray(i).secondArray(j) = min3
End If
Next
Next

'Calculating Return Value...
calculateLevenshteinDistance = FirstArray(ubound(FirstArray)).secondArray(ubound(FirstArray(ubound(FirstArray)).secondArray))
End Function


## Teslock

This is the Levenshtein distance calculation in Teslock Machine Language.

.declare singlecall virtual LevenshteinDistance[args(2) string s1, string s2]: out unsigned int
main
(
define unsigned int constant "cost_ins" == 1;
define unsigned int constant "cost_del" == 1;
define unsigned int constant "cost_sub" == 1;

define unsigned int variable "n1" == calculate::string_operations>length("s1");
define unsigned int variable "n2" == calculate::string_operations>length("s2");

define unsigned int_array(calculate::string_operations>array_instantiation("p")>fixed_length("n2", ++1));
define unsigned int_array(calculate::string_operations>array_instantiation("q")>fixed_length("n2", ++1));
define unsigned int_array(calculate::string_operations>array_instantiation("r")>variable_length);

p>array_vector>0 == 0;
loop_for>finalized(define unsigned int variable "j" == 1)>break_condition("j" <= "n2")>forward_condition(++j)
(
p>array_vector>j == p>array_vector>j::access>+constants::cost_ins;
)

q>array_vector>0 == 0;
loop_for>finalized(define unsigned int variable "i" == 1)>break_condition("i" <= "n1")>forward_condition(++i)
(
q>array_vector>0 == p>array_vector>0::access>+constants::cost_del;

loop for>finalized(define unsigned int variable "j" == 1)>break_condition("j" <= "n2")>forward_condition(++j)
(
define unsigned int variable "d_del" == p>array_vector>j::access>+constants::cost_del;
define unsigned int variable "d_ins" == q>array_vector>j[delegate_handle::j::access>-1]>+constants::cost_ins;
define unsigned int veriable "d_sub" == p>array_vector>j[delegate_handle::j::access>-1]>+logical_operations>xor_result["s1"::access>"s1"[delegate_handle::j::access>-1]?0>return_handle_as_result constants::"cost_sub"];
q>array_vector>j == dll_extern::math_interop_singlecall(min)::[args(3) (d_del, d_ins), d_sub;
)
local>"r" == "p"(self_typecast::ignore_unsafe_condition);
local>"p" == "q"(self_typecast);
local>"q" == "r"(self_typecast);
)

logical_result(param p::singlecall)::define>"return" == p>array_vector>"n2";
)



## Abap

Please note that there is a standard ABAP function distance which calculates the Levenshtein distance without need for the below coding.

REPORT  zlevenshtein.
*----------------------------------------------------------------------*
*       CLASS lcl_levenshtein DEFINITION
*----------------------------------------------------------------------*
*
*----------------------------------------------------------------------*
CLASS lcl_levenshtein DEFINITION.
PUBLIC SECTION.
CLASS-METHODS:
distance IMPORTING i_s TYPE csequence
i_t TYPE csequence
RETURNING value(r) TYPE i.
ENDCLASS.                    "lcl_c DEFINITION
*----------------------------------------------------------------------*
*       CLASS lcl_levenshtein IMPLEMENTATION
*----------------------------------------------------------------------*
*
*----------------------------------------------------------------------*
CLASS lcl_levenshtein IMPLEMENTATION.
METHOD distance.
DEFINE m_get.
l_m_index = ( ( l_l_t * ( l_m_i + ( &2 ) ) ) + l_m_j + ( &1 ) ) + 1 .
read table l_d into r index l_m_index.
insert r into table l_v.
END-OF-DEFINITION.
DATA: l_d       TYPE STANDARD TABLE OF i,
l_v       TYPE SORTED TABLE OF i WITH UNIQUE KEY table_line,
l_cost    TYPE i,
l_m_i     TYPE i,
l_m_j     TYPE i,
l_m_index TYPE i,
l_l_s     TYPE i,
l_l_t     TYPE i.

l_l_s = STRLEN( i_s ).
l_l_t = STRLEN( i_t ).

DO l_l_s TIMES.
l_m_i = sy-index - 1.

DO l_l_t TIMES. "#EC CI_NESTED
l_m_j = sy-index - 1.

IF i_s+l_m_i(1) = i_t+l_m_j(1).
l_cost = 0.
ELSE.
l_cost = 1.
ENDIF.
IF l_m_j = 0.
r = l_cost.
ELSEIF l_m_i = 0.
r = l_cost.
ELSE.

CLEAR l_v.
m_get: -1  0 1, 0 -1 1, -1 -1 l_cost.
READ TABLE l_v INTO r INDEX 1.
ENDIF.
APPEND r TO l_d.

ENDDO.
ENDDO.
ENDMETHOD.                    "distance
ENDCLASS.                    "lcl_levenshtein IMPLEMENTATION

START-OF-SELECTION.
DATA: d TYPE i.

d = lcl_levenshtein=>distance( i_s = 'sitting' i_t = 'kitten' ).
WRITE: / d.


## Pick Basic

       IF STRING1 = STRING2 THEN
LD = 0
END ELSE
S.LEN = LEN(STRING1)
C.LEN = LEN(STRING2)
MAT LD.MTX = ''
DIM LD.MTX(100,100)
FOR I = 3 TO S.LEN + 2
LD.MTX(I,1) = STRING1[I-2,1]
NEXT I
FOR I = 3 TO S.LEN + 2
LD.MTX(I,2) = I - 2
NEXT I
FOR I = 3 TO C.LEN + 2
LD.MTX(1,I) = STRING2[I-2,1]
NEXT I
LD.MTX(2,2) = 0
FOR I = 3 TO C.LEN + 2
LD.MTX(2,I) = I - 2
NEXT I
FOR I = 3 TO (S.LEN+2)
S.LETTER = LD.MTX(I,1)
FOR J = 3 TO (C.LEN+2)
C.LETTER = LD.MTX(1,J)
IF C.LETTER = S.LETTER THEN COST = 0 ELSE COST = 1
P1 = LD.MTX(I-1,J) + 1
P2 = LD.MTX(I,J-1) + 1
P3 = LD.MTX(I-1,J-1) + COST
IF P1 < P2 THEN LD.NUM = P1 ELSE LD.NUM = P2
IF P3 < LD.NUM THEN LD.NUM = P3
LD.MTX(I,J) = LD.NUM
NEXT J
NEXT I
LD = LD.MTX(S.LEN+2,C.LEN+2)
END
`