• Apache Commons LevenshteinDistance莱文斯坦(相似度)算法


    Apache Commons LevenshteinDistanceLevenshteinDistance(final Integer threshold) 如果阈值不为空,则距离计算将限制为最大长度。

    介绍

    如果阈值不为空,则距离计算将受到限制 到最大长度。

    如果阈值为 null,则算法的无限版本将 被使用。

    LevenshteinDistance() 方法是一个构造函数

    语法

    来自LevenshteinDistance 的方法 LevenshteinDistance() 声明为:

    复制

    public LevenshteinDistance(final Integer threshold)
    
    

    参数

    LevenshteinDistance() 方法具有以下参数:

    • 整数阈值 - 如果此值为空,则距离计算将不受限制。这可能不是负面的。

    以下代码演示如何使用 Apache CommonsLevenshteinDistance LevenshteinDistance(final Integer threshold)

    例 1

    复制

    import org.apache.commons.text.*;
    import org.apache.commons.text.diff.*;
    import org.apache.commons.text.similarity.*;
    import org.apache.commons.text.translate.*;
    
    import java.util.HashMap;
    import java.util.Locale;
    import java.util.Map;
    
    class ShowVisitor<Character> implements CommandVisitor<Character> {
        private int inserts = 0;
        private int keeps = 0;
        private int deletes = 0;
    
        public void visitInsertCommand(Character character) {
            ++inserts;//   w w   w  .  d e  m  o   2 s    .  c o  m 
            System.out.println(String.format("insert %s", character));
        }
    
        public void visitKeepCommand(Character character) {
            ++keeps;
            System.out.println(String.format("keep   %s", character));
        }
    
        public void visitDeleteCommand(Character character) {
            ++deletes;
            System.out.println(String.format("delete %s", character));
        }
    
        public void printStats() {
            System.out.println(String.format("%d inserts, %d deletes, %d keeps", inserts, deletes, keeps));
        }
    }
    
    public class CommonsTextExamples {
        public static void main(String[] args) {
            caseUtilsExample();
            stringEscapeUtilsExample();
            stringSubstitutorExample();
            wordUtilsExample();
    
            diffExample();
            translateExample();
    
            similaritiesExample();
            sentenceSimilarityExample();
            distancesExample();
            sentenceDistanceExample();
        }
    
        private static void printExampleHeader(String example) {
            // Contains an example of TextStringBuilder
            String header = "Examples of " + example;
            System.out.println("\n" + header);
    
            TextStringBuilder builder = new TextStringBuilder();
            System.out.println(builder.appendPadding(header.length(), '-').toString());
        }
    
        public static void caseUtilsExample() {
            printExampleHeader("CaseUtils");
    
            String string = "java-programming-language";
    
            System.out.println(CaseUtils.toCamelCase(string, true, '-'));
            System.out.println(CaseUtils.toCamelCase(string, false, '-'));
        }
    
        public static void stringEscapeUtilsExample() {
            printExampleHeader("StringEscapeUtils");
    
            String string = "Department, R&D";
    
            System.out.println(StringEscapeUtils.escapeHtml4(string));
            System.out.println(StringEscapeUtils.escapeXml11(string));
            System.out.println(StringEscapeUtils.escapeCsv(string));
    
            System.out.println(StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_HTML4).append("R&D dept: ")
                    .escape(string).toString());
        }
    
        public static void stringSubstitutorExample() {
            printExampleHeader("StringSubstitutor");
    
            Map<String, String> substitutions = new HashMap<>();
            substitutions.put("city", "London");
            substitutions.put("country", "England");
    
            // With static method
            System.out.println(StringSubstitutor.replace("${city} is the capital of ${country}", substitutions));
    
            // With StringSubstitutor object
            StringSubstitutor sub = new StringSubstitutor(substitutions);
            System.out.println(sub.replace("${city} is the capital of ${country}"));
    
            StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
            System.out.println(interpolator.replace("Base64 encoder: ${base64Encoder:Secret password}"));
        }
    
        public static void wordUtilsExample() {
            printExampleHeader("WordUtils");
    
            String longString = "This is a very long string, from https://www.example.org";
            String allLower = "all lower but ONE";
            String allCapitalized = "All Capitalized But ONE";
    
            System.out.println("\nWordUtils: Abbreviation");
            // Take at least 9 characters, cutting to 12 characters if no space is found before
            System.out.println(WordUtils.abbreviate(longString, 9, 12, " ..."));
            // Take at least 10 characters, cutting to 12 characters if no space is found before
            System.out.println(WordUtils.abbreviate(longString, 10, 12, " ..."));
            // Take at least 10 characters, then cut on the first space wherever it is
            System.out.println(WordUtils.abbreviate(longString, 10, -1, " ..."));
    
            System.out.println("\nWordUtils: Initials");
            System.out.println(WordUtils.initials(allLower));
            System.out.println(WordUtils.initials(allCapitalized));
    
            System.out.println("\nWordUtils: Case change");
            // Doesn't lowercase the uppercase characters
            System.out.println(WordUtils.capitalize(allLower));
            // Lowercases everything, then capitalizes the first letter of each word
            System.out.println(WordUtils.capitalizeFully(allLower));
            // Lowercases the first letter of each word
            System.out.println(WordUtils.uncapitalize(allCapitalized));
            // Swaps the case of each character
            System.out.println(WordUtils.swapCase(allLower));
    
            System.out.println("\nWordUtils: Wrapping");
            // Line length is 10, uses '\n' as a line break, does not break words longer than the line
            System.out.println(WordUtils.wrap(longString, 10, "\n", false) + "\n");
    
            // Line length is 10, uses '\n' as a line break, breaks words longer than the line
            System.out.println(WordUtils.wrap(longString, 10, "\n", true) + "\n");
    
            // Line length is 10, uses '\n' as a line break, breaks words longer than the line, also breaks on commas
            System.out.println(WordUtils.wrap(longString, 10, "\n", true, ",") + "\n");
        }
    
        public static void diffExample() {
            printExampleHeader("diff");
    
            String s1 = "hyperspace";
            String s2 = "cyberscape";
    
            StringsComparator comparator = new StringsComparator(s1, s2);
            EditScript<Character> script = comparator.getScript();
    
            System.out.println(
                    "Longest Common Subsequence length (number of \"keep\" commands): " + script.getLCSLength());
            System.out.println("Effective modifications (number of \"insert\" and \"delete\" commands): "
                    + script.getModifications());
    
            ShowVisitor<Character> visitor = new ShowVisitor<>();
            script.visit(visitor);
            visitor.printStats();
        }
    
        public static void translateExample() {
            printExampleHeader("translate");
    
            Map<CharSequence, CharSequence> translation = new HashMap<>();
            translation.put("e", "3");
            translation.put("l", "1");
            translation.put("t", "7");
    
            String s1 = "Let it be!";
    
            LookupTranslator lookupTranslator = new LookupTranslator(translation);
            System.out.println(lookupTranslator.translate(s1));
    
            UnicodeEscaper unicodeEscaper = new UnicodeEscaper();
            UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper();
    
            String unicodeString = unicodeEscaper.translate(s1);
            System.out.println(unicodeString);
            System.out.println(unicodeUnescaper.translate(unicodeString));
        }
    
        public static void similaritiesExample() {
            printExampleHeader("similarities");
    
            String s1 = "hyperspace";
            String s2 = "cyberscape";
    
            JaccardSimilarity jaccard = new JaccardSimilarity();
            System.out.println("Jaccard similarity: " + jaccard.apply(s1, s2));
    
            JaroWinklerSimilarity jaroWinkler = new JaroWinklerSimilarity();
            System.out.println("Jaro-Winkler similarity: " + jaroWinkler.apply(s1, s2));
    
            LongestCommonSubsequence lcs = new LongestCommonSubsequence();
            System.out.println("Longest Common Subsequence similarity: " + lcs.apply(s1, s2));
    
            FuzzyScore fuzzyScore = new FuzzyScore(Locale.ENGLISH);
            System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, s2));
            System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, "space"));
        }
    
        public static void sentenceSimilarityExample() {
            printExampleHeader("sentence similarity");
    
            String s1 = "string similarity";
            String s2 = "string distance";
    
            Map<CharSequence, Integer> vector1 = new HashMap<>();
            Map<CharSequence, Integer> vector2 = new HashMap<>();
    
            for (String token : s1.split(" ")) {
                vector1.put(token, vector1.getOrDefault(token, 0) + 1);
            }
    
            for (String token : s2.split(" ")) {
                vector2.put(token, vector2.getOrDefault(token, 0) + 1);
            }
    
            CosineSimilarity cosine = new CosineSimilarity();
            System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
    
            // Adding one repetition of "string" to vector2
            vector2.put("string", vector2.getOrDefault("string", 0) + 1);
            System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
        }
    
        public static void distancesExample() {
            printExampleHeader("distances");
    
            String s1 = "hyperspace";
            String s2 = "cyberscape";
    
            HammingDistance hamming = new HammingDistance();
            // Requires the two strings to have the same length
            System.out.println("Hamming distance: " + hamming.apply(s1, s2));
    
            JaccardDistance jaccard = new JaccardDistance();
            System.out.println("Jaccard distance: " + jaccard.apply(s1, s2));
    
            JaroWinklerDistance jaroWinkler = new JaroWinklerDistance();
            // The result is wrong at the moment (see https://issues.apache.org/jira/browse/TEXT-104)
            System.out.println("Jaro-Winkler distance: " + jaroWinkler.apply(s1, s2));
    
            LongestCommonSubsequenceDistance lcs = new LongestCommonSubsequenceDistance();
            System.out.println("Longest Common Subsequence distance: " + lcs.apply(s1, s2));
    
            LevenshteinDistance levenshtein = new LevenshteinDistance();
            System.out.println("Levenshtein distance: " + levenshtein.apply(s1, s2));
    
            LevenshteinDistance levenshteinWithThreshold = new LevenshteinDistance(3);
            // Returns -1 since the actual distance, 4, is higher than the threshold
            System.out.println("Levenshtein distance: " + levenshteinWithThreshold.apply(s1, s2));
    
            LevenshteinDetailedDistance levenshteinDetailed = new LevenshteinDetailedDistance();
            System.out.println("Levenshtein detailed distance: " + levenshteinDetailed.apply(s1, s2));
        }
    
        public static void sentenceDistanceExample() {
            printExampleHeader("sentence distance");
    
            String s1 = "string similarity";
            String s2 = "string distance";
    
            CosineDistance cosine = new CosineDistance();
            System.out.println("Cosine distance: " + cosine.apply(s1, s2));
            System.out.println("Cosine distance: " + cosine.apply(s1, s2 + " string"));
        }
    }
  • 相关阅读:
    第7/100天 阅读笔记
    WPS字母上方打出横杠(-)或尖角(^)
    看了这篇文章CAP你还不懂,我打你!
    计算机毕业设计Java家电产品售后(源码+系统+mysql数据库+lw文档)
    显著性目标检测(一)——与图像分割、目标检测的区别
    【Java面试】5年去某团面试竟遇到这么简单的题?如何破坏双亲委派模型?看看高手是如何回答的
    用户忠诚度衡量指标丨利用净推荐值减少流失
    初始Vue3.0+TypeScript
    人工神经网络的典型模型,人工神经网络模型定义
    商城免费搭建之java商城 开源java电子商务Spring Cloud+Spring Boot+mybatis+MQ+VR全景+b2b2c
  • 原文地址:https://blog.csdn.net/allway2/article/details/127883630