Apache Commons LevenshteinDistanceLevenshteinDistance(final Integer threshold) 如果阈值不为空,则距离计算将限制为最大长度。
如果阈值不为空,则距离计算将受到限制 到最大长度。
如果阈值为 null,则算法的无限版本将 被使用。
LevenshteinDistance() 方法是一个构造函数。
来自LevenshteinDistance 的方法 LevenshteinDistance() 声明为:
复制
public LevenshteinDistance(final Integer threshold)
参数
LevenshteinDistance() 方法具有以下参数:
以下代码演示如何使用 Apache CommonsLevenshteinDistance LevenshteinDistance(final Integer threshold)
例 1
复制
import org.apache.commons.text.*;
import org.apache.commons.text.diff.*;
import org.apache.commons.text.similarity.*;
import org.apache.commons.text.translate.*;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
class ShowVisitor<Character> implements CommandVisitor<Character> {
private int inserts = 0;
private int keeps = 0;
private int deletes = 0;
public void visitInsertCommand(Character character) {
++inserts;// w w w . d e m o 2 s . c o m
System.out.println(String.format("insert %s", character));
}
public void visitKeepCommand(Character character) {
++keeps;
System.out.println(String.format("keep %s", character));
}
public void visitDeleteCommand(Character character) {
++deletes;
System.out.println(String.format("delete %s", character));
}
public void printStats() {
System.out.println(String.format("%d inserts, %d deletes, %d keeps", inserts, deletes, keeps));
}
}
public class CommonsTextExamples {
public static void main(String[] args) {
caseUtilsExample();
stringEscapeUtilsExample();
stringSubstitutorExample();
wordUtilsExample();
diffExample();
translateExample();
similaritiesExample();
sentenceSimilarityExample();
distancesExample();
sentenceDistanceExample();
}
private static void printExampleHeader(String example) {
// Contains an example of TextStringBuilder
String header = "Examples of " + example;
System.out.println("\n" + header);
TextStringBuilder builder = new TextStringBuilder();
System.out.println(builder.appendPadding(header.length(), '-').toString());
}
public static void caseUtilsExample() {
printExampleHeader("CaseUtils");
String string = "java-programming-language";
System.out.println(CaseUtils.toCamelCase(string, true, '-'));
System.out.println(CaseUtils.toCamelCase(string, false, '-'));
}
public static void stringEscapeUtilsExample() {
printExampleHeader("StringEscapeUtils");
String string = "Department, R&D";
System.out.println(StringEscapeUtils.escapeHtml4(string));
System.out.println(StringEscapeUtils.escapeXml11(string));
System.out.println(StringEscapeUtils.escapeCsv(string));
System.out.println(StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_HTML4).append("R&D dept: ")
.escape(string).toString());
}
public static void stringSubstitutorExample() {
printExampleHeader("StringSubstitutor");
Map<String, String> substitutions = new HashMap<>();
substitutions.put("city", "London");
substitutions.put("country", "England");
// With static method
System.out.println(StringSubstitutor.replace("${city} is the capital of ${country}", substitutions));
// With StringSubstitutor object
StringSubstitutor sub = new StringSubstitutor(substitutions);
System.out.println(sub.replace("${city} is the capital of ${country}"));
StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
System.out.println(interpolator.replace("Base64 encoder: ${base64Encoder:Secret password}"));
}
public static void wordUtilsExample() {
printExampleHeader("WordUtils");
String longString = "This is a very long string, from https://www.example.org";
String allLower = "all lower but ONE";
String allCapitalized = "All Capitalized But ONE";
System.out.println("\nWordUtils: Abbreviation");
// Take at least 9 characters, cutting to 12 characters if no space is found before
System.out.println(WordUtils.abbreviate(longString, 9, 12, " ..."));
// Take at least 10 characters, cutting to 12 characters if no space is found before
System.out.println(WordUtils.abbreviate(longString, 10, 12, " ..."));
// Take at least 10 characters, then cut on the first space wherever it is
System.out.println(WordUtils.abbreviate(longString, 10, -1, " ..."));
System.out.println("\nWordUtils: Initials");
System.out.println(WordUtils.initials(allLower));
System.out.println(WordUtils.initials(allCapitalized));
System.out.println("\nWordUtils: Case change");
// Doesn't lowercase the uppercase characters
System.out.println(WordUtils.capitalize(allLower));
// Lowercases everything, then capitalizes the first letter of each word
System.out.println(WordUtils.capitalizeFully(allLower));
// Lowercases the first letter of each word
System.out.println(WordUtils.uncapitalize(allCapitalized));
// Swaps the case of each character
System.out.println(WordUtils.swapCase(allLower));
System.out.println("\nWordUtils: Wrapping");
// Line length is 10, uses '\n' as a line break, does not break words longer than the line
System.out.println(WordUtils.wrap(longString, 10, "\n", false) + "\n");
// Line length is 10, uses '\n' as a line break, breaks words longer than the line
System.out.println(WordUtils.wrap(longString, 10, "\n", true) + "\n");
// Line length is 10, uses '\n' as a line break, breaks words longer than the line, also breaks on commas
System.out.println(WordUtils.wrap(longString, 10, "\n", true, ",") + "\n");
}
public static void diffExample() {
printExampleHeader("diff");
String s1 = "hyperspace";
String s2 = "cyberscape";
StringsComparator comparator = new StringsComparator(s1, s2);
EditScript<Character> script = comparator.getScript();
System.out.println(
"Longest Common Subsequence length (number of \"keep\" commands): " + script.getLCSLength());
System.out.println("Effective modifications (number of \"insert\" and \"delete\" commands): "
+ script.getModifications());
ShowVisitor<Character> visitor = new ShowVisitor<>();
script.visit(visitor);
visitor.printStats();
}
public static void translateExample() {
printExampleHeader("translate");
Map<CharSequence, CharSequence> translation = new HashMap<>();
translation.put("e", "3");
translation.put("l", "1");
translation.put("t", "7");
String s1 = "Let it be!";
LookupTranslator lookupTranslator = new LookupTranslator(translation);
System.out.println(lookupTranslator.translate(s1));
UnicodeEscaper unicodeEscaper = new UnicodeEscaper();
UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper();
String unicodeString = unicodeEscaper.translate(s1);
System.out.println(unicodeString);
System.out.println(unicodeUnescaper.translate(unicodeString));
}
public static void similaritiesExample() {
printExampleHeader("similarities");
String s1 = "hyperspace";
String s2 = "cyberscape";
JaccardSimilarity jaccard = new JaccardSimilarity();
System.out.println("Jaccard similarity: " + jaccard.apply(s1, s2));
JaroWinklerSimilarity jaroWinkler = new JaroWinklerSimilarity();
System.out.println("Jaro-Winkler similarity: " + jaroWinkler.apply(s1, s2));
LongestCommonSubsequence lcs = new LongestCommonSubsequence();
System.out.println("Longest Common Subsequence similarity: " + lcs.apply(s1, s2));
FuzzyScore fuzzyScore = new FuzzyScore(Locale.ENGLISH);
System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, s2));
System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, "space"));
}
public static void sentenceSimilarityExample() {
printExampleHeader("sentence similarity");
String s1 = "string similarity";
String s2 = "string distance";
Map<CharSequence, Integer> vector1 = new HashMap<>();
Map<CharSequence, Integer> vector2 = new HashMap<>();
for (String token : s1.split(" ")) {
vector1.put(token, vector1.getOrDefault(token, 0) + 1);
}
for (String token : s2.split(" ")) {
vector2.put(token, vector2.getOrDefault(token, 0) + 1);
}
CosineSimilarity cosine = new CosineSimilarity();
System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
// Adding one repetition of "string" to vector2
vector2.put("string", vector2.getOrDefault("string", 0) + 1);
System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
}
public static void distancesExample() {
printExampleHeader("distances");
String s1 = "hyperspace";
String s2 = "cyberscape";
HammingDistance hamming = new HammingDistance();
// Requires the two strings to have the same length
System.out.println("Hamming distance: " + hamming.apply(s1, s2));
JaccardDistance jaccard = new JaccardDistance();
System.out.println("Jaccard distance: " + jaccard.apply(s1, s2));
JaroWinklerDistance jaroWinkler = new JaroWinklerDistance();
// The result is wrong at the moment (see https://issues.apache.org/jira/browse/TEXT-104)
System.out.println("Jaro-Winkler distance: " + jaroWinkler.apply(s1, s2));
LongestCommonSubsequenceDistance lcs = new LongestCommonSubsequenceDistance();
System.out.println("Longest Common Subsequence distance: " + lcs.apply(s1, s2));
LevenshteinDistance levenshtein = new LevenshteinDistance();
System.out.println("Levenshtein distance: " + levenshtein.apply(s1, s2));
LevenshteinDistance levenshteinWithThreshold = new LevenshteinDistance(3);
// Returns -1 since the actual distance, 4, is higher than the threshold
System.out.println("Levenshtein distance: " + levenshteinWithThreshold.apply(s1, s2));
LevenshteinDetailedDistance levenshteinDetailed = new LevenshteinDetailedDistance();
System.out.println("Levenshtein detailed distance: " + levenshteinDetailed.apply(s1, s2));
}
public static void sentenceDistanceExample() {
printExampleHeader("sentence distance");
String s1 = "string similarity";
String s2 = "string distance";
CosineDistance cosine = new CosineDistance();
System.out.println("Cosine distance: " + cosine.apply(s1, s2));
System.out.println("Cosine distance: " + cosine.apply(s1, s2 + " string"));
}
}