• Apache Commons LevenshteinDistance莱文斯坦(相似度)算法


    Apache Commons LevenshteinDistanceLevenshteinDistance(final Integer threshold) 如果阈值不为空,则距离计算将限制为最大长度。

    介绍

    如果阈值不为空,则距离计算将受到限制 到最大长度。

    如果阈值为 null,则算法的无限版本将 被使用。

    LevenshteinDistance() 方法是一个构造函数

    语法

    来自LevenshteinDistance 的方法 LevenshteinDistance() 声明为:

    复制

    public LevenshteinDistance(final Integer threshold)
    
    

    参数

    LevenshteinDistance() 方法具有以下参数:

    • 整数阈值 - 如果此值为空,则距离计算将不受限制。这可能不是负面的。

    以下代码演示如何使用 Apache CommonsLevenshteinDistance LevenshteinDistance(final Integer threshold)

    例 1

    复制

    import org.apache.commons.text.*;
    import org.apache.commons.text.diff.*;
    import org.apache.commons.text.similarity.*;
    import org.apache.commons.text.translate.*;
    
    import java.util.HashMap;
    import java.util.Locale;
    import java.util.Map;
    
    class ShowVisitor<Character> implements CommandVisitor<Character> {
        private int inserts = 0;
        private int keeps = 0;
        private int deletes = 0;
    
        public void visitInsertCommand(Character character) {
            ++inserts;//   w w   w  .  d e  m  o   2 s    .  c o  m 
            System.out.println(String.format("insert %s", character));
        }
    
        public void visitKeepCommand(Character character) {
            ++keeps;
            System.out.println(String.format("keep   %s", character));
        }
    
        public void visitDeleteCommand(Character character) {
            ++deletes;
            System.out.println(String.format("delete %s", character));
        }
    
        public void printStats() {
            System.out.println(String.format("%d inserts, %d deletes, %d keeps", inserts, deletes, keeps));
        }
    }
    
    public class CommonsTextExamples {
        public static void main(String[] args) {
            caseUtilsExample();
            stringEscapeUtilsExample();
            stringSubstitutorExample();
            wordUtilsExample();
    
            diffExample();
            translateExample();
    
            similaritiesExample();
            sentenceSimilarityExample();
            distancesExample();
            sentenceDistanceExample();
        }
    
        private static void printExampleHeader(String example) {
            // Contains an example of TextStringBuilder
            String header = "Examples of " + example;
            System.out.println("\n" + header);
    
            TextStringBuilder builder = new TextStringBuilder();
            System.out.println(builder.appendPadding(header.length(), '-').toString());
        }
    
        public static void caseUtilsExample() {
            printExampleHeader("CaseUtils");
    
            String string = "java-programming-language";
    
            System.out.println(CaseUtils.toCamelCase(string, true, '-'));
            System.out.println(CaseUtils.toCamelCase(string, false, '-'));
        }
    
        public static void stringEscapeUtilsExample() {
            printExampleHeader("StringEscapeUtils");
    
            String string = "Department, R&D";
    
            System.out.println(StringEscapeUtils.escapeHtml4(string));
            System.out.println(StringEscapeUtils.escapeXml11(string));
            System.out.println(StringEscapeUtils.escapeCsv(string));
    
            System.out.println(StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_HTML4).append("R&D dept: ")
                    .escape(string).toString());
        }
    
        public static void stringSubstitutorExample() {
            printExampleHeader("StringSubstitutor");
    
            Map<String, String> substitutions = new HashMap<>();
            substitutions.put("city", "London");
            substitutions.put("country", "England");
    
            // With static method
            System.out.println(StringSubstitutor.replace("${city} is the capital of ${country}", substitutions));
    
            // With StringSubstitutor object
            StringSubstitutor sub = new StringSubstitutor(substitutions);
            System.out.println(sub.replace("${city} is the capital of ${country}"));
    
            StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
            System.out.println(interpolator.replace("Base64 encoder: ${base64Encoder:Secret password}"));
        }
    
        public static void wordUtilsExample() {
            printExampleHeader("WordUtils");
    
            String longString = "This is a very long string, from https://www.example.org";
            String allLower = "all lower but ONE";
            String allCapitalized = "All Capitalized But ONE";
    
            System.out.println("\nWordUtils: Abbreviation");
            // Take at least 9 characters, cutting to 12 characters if no space is found before
            System.out.println(WordUtils.abbreviate(longString, 9, 12, " ..."));
            // Take at least 10 characters, cutting to 12 characters if no space is found before
            System.out.println(WordUtils.abbreviate(longString, 10, 12, " ..."));
            // Take at least 10 characters, then cut on the first space wherever it is
            System.out.println(WordUtils.abbreviate(longString, 10, -1, " ..."));
    
            System.out.println("\nWordUtils: Initials");
            System.out.println(WordUtils.initials(allLower));
            System.out.println(WordUtils.initials(allCapitalized));
    
            System.out.println("\nWordUtils: Case change");
            // Doesn't lowercase the uppercase characters
            System.out.println(WordUtils.capitalize(allLower));
            // Lowercases everything, then capitalizes the first letter of each word
            System.out.println(WordUtils.capitalizeFully(allLower));
            // Lowercases the first letter of each word
            System.out.println(WordUtils.uncapitalize(allCapitalized));
            // Swaps the case of each character
            System.out.println(WordUtils.swapCase(allLower));
    
            System.out.println("\nWordUtils: Wrapping");
            // Line length is 10, uses '\n' as a line break, does not break words longer than the line
            System.out.println(WordUtils.wrap(longString, 10, "\n", false) + "\n");
    
            // Line length is 10, uses '\n' as a line break, breaks words longer than the line
            System.out.println(WordUtils.wrap(longString, 10, "\n", true) + "\n");
    
            // Line length is 10, uses '\n' as a line break, breaks words longer than the line, also breaks on commas
            System.out.println(WordUtils.wrap(longString, 10, "\n", true, ",") + "\n");
        }
    
        public static void diffExample() {
            printExampleHeader("diff");
    
            String s1 = "hyperspace";
            String s2 = "cyberscape";
    
            StringsComparator comparator = new StringsComparator(s1, s2);
            EditScript<Character> script = comparator.getScript();
    
            System.out.println(
                    "Longest Common Subsequence length (number of \"keep\" commands): " + script.getLCSLength());
            System.out.println("Effective modifications (number of \"insert\" and \"delete\" commands): "
                    + script.getModifications());
    
            ShowVisitor<Character> visitor = new ShowVisitor<>();
            script.visit(visitor);
            visitor.printStats();
        }
    
        public static void translateExample() {
            printExampleHeader("translate");
    
            Map<CharSequence, CharSequence> translation = new HashMap<>();
            translation.put("e", "3");
            translation.put("l", "1");
            translation.put("t", "7");
    
            String s1 = "Let it be!";
    
            LookupTranslator lookupTranslator = new LookupTranslator(translation);
            System.out.println(lookupTranslator.translate(s1));
    
            UnicodeEscaper unicodeEscaper = new UnicodeEscaper();
            UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper();
    
            String unicodeString = unicodeEscaper.translate(s1);
            System.out.println(unicodeString);
            System.out.println(unicodeUnescaper.translate(unicodeString));
        }
    
        public static void similaritiesExample() {
            printExampleHeader("similarities");
    
            String s1 = "hyperspace";
            String s2 = "cyberscape";
    
            JaccardSimilarity jaccard = new JaccardSimilarity();
            System.out.println("Jaccard similarity: " + jaccard.apply(s1, s2));
    
            JaroWinklerSimilarity jaroWinkler = new JaroWinklerSimilarity();
            System.out.println("Jaro-Winkler similarity: " + jaroWinkler.apply(s1, s2));
    
            LongestCommonSubsequence lcs = new LongestCommonSubsequence();
            System.out.println("Longest Common Subsequence similarity: " + lcs.apply(s1, s2));
    
            FuzzyScore fuzzyScore = new FuzzyScore(Locale.ENGLISH);
            System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, s2));
            System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, "space"));
        }
    
        public static void sentenceSimilarityExample() {
            printExampleHeader("sentence similarity");
    
            String s1 = "string similarity";
            String s2 = "string distance";
    
            Map<CharSequence, Integer> vector1 = new HashMap<>();
            Map<CharSequence, Integer> vector2 = new HashMap<>();
    
            for (String token : s1.split(" ")) {
                vector1.put(token, vector1.getOrDefault(token, 0) + 1);
            }
    
            for (String token : s2.split(" ")) {
                vector2.put(token, vector2.getOrDefault(token, 0) + 1);
            }
    
            CosineSimilarity cosine = new CosineSimilarity();
            System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
    
            // Adding one repetition of "string" to vector2
            vector2.put("string", vector2.getOrDefault("string", 0) + 1);
            System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
        }
    
        public static void distancesExample() {
            printExampleHeader("distances");
    
            String s1 = "hyperspace";
            String s2 = "cyberscape";
    
            HammingDistance hamming = new HammingDistance();
            // Requires the two strings to have the same length
            System.out.println("Hamming distance: " + hamming.apply(s1, s2));
    
            JaccardDistance jaccard = new JaccardDistance();
            System.out.println("Jaccard distance: " + jaccard.apply(s1, s2));
    
            JaroWinklerDistance jaroWinkler = new JaroWinklerDistance();
            // The result is wrong at the moment (see https://issues.apache.org/jira/browse/TEXT-104)
            System.out.println("Jaro-Winkler distance: " + jaroWinkler.apply(s1, s2));
    
            LongestCommonSubsequenceDistance lcs = new LongestCommonSubsequenceDistance();
            System.out.println("Longest Common Subsequence distance: " + lcs.apply(s1, s2));
    
            LevenshteinDistance levenshtein = new LevenshteinDistance();
            System.out.println("Levenshtein distance: " + levenshtein.apply(s1, s2));
    
            LevenshteinDistance levenshteinWithThreshold = new LevenshteinDistance(3);
            // Returns -1 since the actual distance, 4, is higher than the threshold
            System.out.println("Levenshtein distance: " + levenshteinWithThreshold.apply(s1, s2));
    
            LevenshteinDetailedDistance levenshteinDetailed = new LevenshteinDetailedDistance();
            System.out.println("Levenshtein detailed distance: " + levenshteinDetailed.apply(s1, s2));
        }
    
        public static void sentenceDistanceExample() {
            printExampleHeader("sentence distance");
    
            String s1 = "string similarity";
            String s2 = "string distance";
    
            CosineDistance cosine = new CosineDistance();
            System.out.println("Cosine distance: " + cosine.apply(s1, s2));
            System.out.println("Cosine distance: " + cosine.apply(s1, s2 + " string"));
        }
    }
  • 相关阅读:
    解锁新技能《Redis SETBIT用法》
    伪装目标检测与分割研究进展
    Flask Web——Jinjia2模板的使用
    数据库大咖,带你深入高性能MySQL架构系统,值得一看
    VOC数据集介绍
    【无标题】
    工程师每日刷题-7
    整合JVM-SANDBOX与VMTOOL,实现支持OGNL的增强自定义MOCK
    OpenCV入门7——OpenCV中的滤波器(包括低通滤波与高通滤波,其中低通滤波用于降噪,而高通滤波用于边缘检测)
    2022年湖北省能源领域首台(套)重大技术装备申报条件、流程及有关要求
  • 原文地址:https://blog.csdn.net/allway2/article/details/127883630