blob: 22db4fe70e7579719ab06a504cc52197e3d018b9 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "edit_distance.h"
#include <vector>
int EditDistance(const StringPiece& s1,
const StringPiece& s2,
bool allow_replacements,
int max_edit_distance) {
// The algorithm implemented below is the "classic"
// dynamic-programming algorithm for computing the Levenshtein
// distance, which is described here:
//
// http://en.wikipedia.org/wiki/Levenshtein_distance
//
// Although the algorithm is typically described using an m x n
// array, only two rows are used at a time, so this implemenation
// just keeps two separate vectors for those two rows.
int m = s1.len_;
int n = s2.len_;
std::vector<int> previous(n + 1);
std::vector<int> current(n + 1);
for (int i = 0; i <= n; ++i)
previous[i] = i;
for (int y = 1; y <= m; ++y) {
current[0] = y;
int best_this_row = current[0];
for (int x = 1; x <= n; ++x) {
if (allow_replacements) {
current[x] = min(previous[x-1] + (s1.str_[y-1] == s2.str_[x-1] ? 0 : 1),
min(current[x-1], previous[x])+1);
}
else {
if (s1.str_[y-1] == s2.str_[x-1])
current[x] = previous[x-1];
else
current[x] = min(current[x-1], previous[x]) + 1;
}
best_this_row = min(best_this_row, current[x]);
}
if (max_edit_distance && best_this_row > max_edit_distance)
return max_edit_distance + 1;
current.swap(previous);
}
return previous[n];
}