• 正则表达式


    正则表达式规则可以匹配
    A指定字符A
    \u548c指定Unicode字符
    .任意字符a, b, &, 0, ...
    \d0-90, 1, 2, ..., 9
    \wa-z, A-Z, 0-9, _a, A, 0, _, ...
    \s空格、tab键“ ”
    \D非数字a, A, &, _, ...
    \W非\w&, @, 中, ...
    \S非\sa, A, &, _, ...
    AB*任意个数字符A, AB, ABB, ABBB
    AB+至少1个字符AB, ABB, ABBB
    AB?0个或1个字符A, AB
    AB{3}指定个数字符ABBB
    AB{1, 3}指定范围个数字符AB, ABB, ABBB
    AB{2,}至少n个字符ABB, ABBB, ...
    AB{0, 3}最多n个字符A, AB, ABB, ABBB

     

    1. // java版本:
    2. public class test {
    3. // 区号(3-4个数字)-电话号码(6-8个数字)
    4. public static boolean isValidTel(String s) {
    5. //return s.matches("\\d{3,4}\\-\\d{6,8}");
    6. //国内区号以0开头
    7. return s.matches("0\\d{2,3}\\-\\d{6,8}");
    8. }
    9. public static void main(String[] args) {
    10. System.out.println(isValidTel("010-123456"));
    11. System.out.println(isValidTel("0123-12345678"));
    12. System.out.println(isValidTel("123-0123456"));
    13. System.out.println("------");
    14. System.out.println(isValidTel("010#12345678"));
    15. System.out.println(isValidTel("010X12345678"));
    16. System.out.println(isValidTel("01-12345678"));
    17. System.out.println(isValidTel("01234-12345678"));
    18. System.out.println(isValidTel("01A-12345678"));
    19. System.out.println(isValidTel("012-1234567A"));
    20. }
    21. }
    22. true
    23. true
    24. false
    25. ------
    26. false
    27. false
    28. false
    29. false
    30. false
    31. false
    1. # python版本:
    2. import re
    3. def isValidTel(str):
    4. # 国内区号以0开头
    5. # 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
    6. return re.match(r'^0\d{2,3}\-\d{6,8}$', str)
    7. print(isValidTel("010-123456"));
    8. print(isValidTel("0123-12345678"));
    9. print(isValidTel("123-0123456"));
    10. print("------");
    11. print(isValidTel("010#12345678"));
    12. print(isValidTel("010X12345678"));
    13. print(isValidTel("01-12345678"));
    14. print(isValidTel("01234-12345678"));
    15. print(isValidTel("01A-12345678"));
    16. print(isValidTel("012-1234567A"));
    17. # re.match只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回None;
    18. # 而re.search匹配整个字符串,直到找到一个匹配
    19. print("------");
    20. print(re.match(r'0\d{2,3}\-\d{6,8}', "1012-1234567A")); # None
    21. print(re.search(r'0\d{2,3}\-\d{6,8}', "1012-1234567A")); #
    22. print("------");
    23. pattern2 = re.compile(r'^0\d{2,3}\-\d{6,8}$')
    24. match2 = pattern2.match("0123-12345678")
    25. if match2:
    26. print(match2.group()) # "0123-12345678"
    27. print(match2.start()) # "0"
    28. print(match2.end()) # "13"
    29. object; span=(0, 10), match='010-123456'>
    30. object; span=(0, 13), match='0123-12345678'>
    31. None
    32. ------
    33. None
    34. None
    35. None
    36. None
    37. None
    38. None
    39. ------
    40. None
    41. object; span=(1, 12), match='012-12345678'>
    42. ------
    43. 0123-12345678
    44. 0
    45. 13

    正则表达式规则可以匹配
    ^开头字符串开头
    $结尾字符串结束
    [ABC][...]内任意字符A, B, C
    [A-F0-9xy]指定范围的字符A, ..., F, 0, ..., 9, x, y
    [^A-F]指定范围外的任意字符非A, ..., F
    AB|CDAB或CDAB, CD
    AB|CD|EFGAB或CD或EFGAB, CD, EFG
    1. // java版本:
    2. public class test {
    3. // 区号(3-4个数字)-电话号码(6-8个数字)
    4. public static boolean isValidTel(String s) {
    5. //return s.matches("\\d{3,4}\\-\\d{6,8}");
    6. //国内区号以0开头,电话号码不能以0开头
    7. return s.matches("0\\d{2,3}\\-[1-9]\\d{5,7}");
    8. }
    9. // QQ号码
    10. public static boolean isValidQQ(String s) {
    11. return s.matches("^[1-9]\\d{4,9}$");
    12. }
    13. public static void main(String[] args) {
    14. System.out.println(isValidQQ("10000"));
    15. System.out.println(isValidQQ("99999"));
    16. System.out.println(isValidQQ("1234567890"));
    17. System.out.println("-----");
    18. System.out.println(isValidQQ("00001"));
    19. System.out.println("-----");
    20. System.out.println(isValidTel("010-123455"));
    21. System.out.println(isValidTel("0123-12345678"));
    22. System.out.println("-----");
    23. System.out.println(isValidTel("123-12345678"));
    24. System.out.println(isValidTel("010-023456"));
    25. }
    26. }
    27. true
    28. true
    29. true
    30. -----
    31. false
    32. -----
    33. true
    34. true
    35. -----
    36. false
    37. false
    1. # python版本:
    2. import re
    3. def isValidTel(str):
    4. # 国内区号以0开头
    5. # 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
    6. return re.match(r'^0\d{2,3}\-[1-9]\d{5,7}$', str)
    7. def isValidQQ(str):
    8. # QQ号码
    9. return re.match(r'^[1-9]\d{4,9}$', str)
    10. print(isValidQQ("10000"));
    11. print(isValidQQ("99999"));
    12. print(isValidQQ("1234567890"));
    13. print("-----");
    14. print(isValidQQ("00001"));
    15. print("-----");
    16. print(isValidTel("010-123455"));
    17. print(isValidTel("0123-12345678"));
    18. print("-----");
    19. print(isValidTel("123-12345678"));
    20. print(isValidTel("010-023456"));
    21. object; span=(0, 5), match='10000'>
    22. object; span=(0, 5), match='99999'>
    23. object; span=(0, 10), match='1234567890'>
    24. -----
    25. None
    26. -----
    27. object; span=(0, 10), match='010-123455'>
    28. object; span=(0, 13), match='0123-12345678'>
    29. -----
    30. None
    31. None
    • 示例:判断用户输入的年份是否是19xx年:
      • 规则:1 9 0-9 0-9
      • 正则:19\d\d
      • java字符串:"19\\d\\d"
      • java代码:
        1. # java版本:
        2. public class test {
        3. public static boolean is19xx(String s) {
        4. if (s == null){
        5. return false;
        6. }
        7. return s.matches("19\\d\\d");
        8. }
        9. public static void main(String[] args) {
        10. System.out.println(is19xx("1900"));
        11. System.out.println(is19xx("1911"));
        12. System.out.println(is19xx("1999"));
        13. System.out.println("------");
        14. System.out.println(is19xx(null));
        15. System.out.println(is19xx(""));
        16. System.out.println(is19xx("19"));
        17. System.out.println(is19xx("190A"));
        18. System.out.println(is19xx("19001"));
        19. System.out.println(is19xx("1900s"));
        20. System.out.println(is19xx("2900"));
        21. System.out.println(is19xx("A900"));
        22. }
        23. }
      • 运行结束:
        1. true
        2. true
        3. true
        4. ------
        5. false
        6. false
        7. false
        8. false
        9. false
        10. false
        11. false
        12. false
        1. # python版本:
        2. import re
        3. def is19xx(str):
        4. # 国内区号以0开头
        5. # 匹配成功 re.match 方法返回一个匹配的对象,否则返回 None
        6. return re.match(r'19\d\d', str)
        7. print(is19xx("1900"));
        8. print(is19xx("1911"));
        9. print(is19xx("1999"));
        10. print("------");
        11. print(is19xx(""));
        12. print(is19xx("19"));
        13. print(is19xx("190A"));
        14. print(is19xx("19001"));
        15. print(is19xx("1900s"));
        16. print(is19xx("2900"));
        17. print(is19xx("A900"));
        18. print("------");
        19. pattern2 = re.compile(r'^0\d{2,3}\-\d{6,8}$')
        20. match2 = pattern2.match("0123-12345678")
        21. if match2:
        22. print(match2.group()) # "0123-12345678"
        23. print(match2.start()) # "0"
        24. print(match2.end()) # "13"
        25. object; span=(0, 4), match='1900'>
        26. object; span=(0, 4), match='1911'>
        27. object; span=(0, 4), match='1999'>
        28. ------
        29. None
        30. None
        31. None
        32. object; span=(0, 4), match='1900'>
        33. object; span=(0, 4), match='1900'>
        34. None
        35. None
        36. ------
        37. 0123-12345678
        38. 0
        39. 13

    1. import java.util.regex.Matcher;
    2. import java.util.regex.Pattern;
    3. // java版本:
    4. public class test {
    5. public static void main(String[] args) {
    6. String regex = "^\\d{3,4}\\-\\d{6,8}$";
    7. System.out.println("010-12345678".matches(regex));
    8. System.out.println("------");
    9. // 反复使用一个正则表达式字符串进行快速匹配效率较低
    10. // (因为字符串首先得编译成Pattern对象)
    11. // 反复使用,可以移动到方法外
    12. Pattern pattern = Pattern.compile("^\\d{3,4}\\-\\d{6,8}$");
    13. Matcher matcher = pattern.matcher("010-12345678");
    14. System.out.println(matcher.matches());
    15. System.out.println(pattern.matcher("021-123456").matches());
    16. System.out.println(pattern.matcher("021#123456").matches());
    17. System.out.println("------");
    18. //使用Matcher.group(n)可以快速提取子串
    19. Pattern pattern1 = Pattern.compile("^(\\d{3,4})\\-(\\d{6,8})$");
    20. Matcher matcher1 = pattern1.matcher("010-12345678");
    21. if (matcher1.matches()){
    22. System.out.println(matcher1.group(0)); // "010-12345678",0表示匹配的整个字符串
    23. System.out.println(matcher1.group(1)); // "010", 1表示匹配的第1个子串
    24. System.out.println(matcher1.group(2)); // "12345678", 2表示匹配的第2个子串
    25. }
    26. }
    27. }
    28. true
    29. ------
    30. true
    31. true
    32. false
    33. ------
    34. 010-12345678
    35. 010
    36. 12345678
    1. # python版本:
    2. import re
    3. # re.match 尝试从字符串的起始位置匹配一个模式,如果不是起始位置匹配成功的话,match() 就返回 none
    4. regex = "^\d{3,4}\-\d{6,8}$"
    5. print(re.match(regex, "010-12345678"))
    6. print("----")
    7. print(re.match(regex, "021-123456"))
    8. print(re.match(regex, "021#123456"))
    9. print("----")
    10. # re.compile感觉是完全匹配整个字符串
    11. pattern = re.compile(r'^\d{3,4}\-\d{6,8}$')
    12. print(pattern.match('021-123456A'))
    13. print(pattern.match("021-123456"))
    14. print("----")
    15. matchObj = re.match(r'^(\d{3,4})\-(\d{6,8})$',"010-12345678")
    16. if matchObj:
    17. print("matchObj.group() : ", matchObj.group())
    18. print("matchObj.group(1) : ", matchObj.group(1))
    19. print("matchObj.group(2) : ", matchObj.group(2))
    20. else:
    21. print("No match!!")
    22. object; span=(0, 12), match='010-12345678'>
    23. ----
    24. object; span=(0, 10), match='021-123456'>
    25. None
    26. ----
    27. None
    28. object; span=(0, 10), match='021-123456'>
    29. ----
    30. matchObj.group() : 010-12345678
    31. matchObj.group(1) : 010
    32. matchObj.group(2) : 12345678

    1. import java.util.regex.Matcher;
    2. import java.util.regex.Pattern;
    3. // java版本:
    4. public class test {
    5. public static void main(String[] args) {
    6. // "^(\d+)(0*)$"
    7. // 正则表达式默认是使用贪婪匹配:尽可能多的向后匹配
    8. Pattern pattern1 = Pattern.compile("^(\\d+)(0*)$");
    9. Matcher matcher1 = pattern1.matcher("12300");
    10. if (matcher1.matches()){
    11. System.out.println(matcher1.group(1)); // "12300"
    12. System.out.println(matcher1.group(2)); // ""
    13. }
    14. // "^(\d+?)(0*)$"——实现非贪婪匹配
    15. Pattern pattern2 = Pattern.compile("^(\\d+?)(0*)$");
    16. Matcher matcher2 = pattern2.matcher("12300");
    17. if (matcher2.matches()){
    18. System.out.println(matcher2.group(1)); // "123"
    19. System.out.println(matcher2.group(2)); // "00"
    20. }
    21. // 区分?是0个或者1个还是非贪婪匹配
    22. // "^(\\d??)(0*)$":第一个?是0个或者1个,第2个?是非贪婪匹配
    23. Pattern pattern3 = Pattern.compile("^(\\d??)(0*)$");
    24. Matcher matcher3 = pattern3.matcher("12300");
    25. if (matcher3.matches()){
    26. System.out.println(matcher3.group(1)); // ""
    27. System.out.println(matcher3.group(2)); // "12300"
    28. }
    29. }
    30. }

     

    1. # python版本:
    2. import re
    3. pattern1 = re.compile(r'^(\d+)(0*)$')
    4. match1 = pattern1.match("12300")
    5. if match1:
    6. print(match1.group(1)) # "12300"
    7. print(match1.group(2)) # ""
    8. # "^(\d+?)(0*)$"——实现非贪婪匹配
    9. pattern2 = re.compile(r'^(\d+?)(0*)$')
    10. match2 = pattern2.match("12300")
    11. if match2:
    12. print(match2.group(1)) # "123"
    13. print(match2.group(2)) # "00"
    14. 12300
    15. 123
    16. 00
    1. import java.util.Arrays;
    2. import java.util.regex.Matcher;
    3. import java.util.regex.Pattern;
    4. // java版本:
    5. public class test {
    6. public static void main(String[] args) {
    7. // 使用正则表达式分割字符串:
    8. // String[] String.split(String regex)
    9. System.out.println(Arrays.toString("a b c".split("\\s"))); // ["a", "b", "c"]
    10. System.out.println(Arrays.toString("a b c".split("\\s"))); // ["a", "b", "", "c"]
    11. System.out.println(Arrays.toString("a b c".split("\\s+"))); // ["a", "b", "c"]
    12. System.out.println(Arrays.toString("a, b ;; c".split("[\\,\\;\\s]+"))); // ["a", "b", "c"]
    13. System.out.println("----");
    14. // 使用正则表达式搜索字符串:
    15. // Matcher.find()
    16. // Pattern.CASE_INSENSITIVE:忽略大小写
    17. String s = "The quick brown fox jumps over the lazy dug.";
    18. // Pattern p = Pattern.compile("the", Pattern.CASE_INSENSITIVE);
    19. // Pattern p = Pattern.compile("\\w+", Pattern.CASE_INSENSITIVE); // 提取出每个单词
    20. Pattern p = Pattern.compile("\\w*o\\w*", Pattern.CASE_INSENSITIVE); // 提取出单词中带有o的单词
    21. Matcher m = p.matcher(s);
    22. while (m.find()){
    23. String sub = s.substring(m.start(), m. end());
    24. System.out.println(sub+", start="+m.start()+", end="+m.end());
    25. }
    26. System.out.println("-----");
    27. // 使用正则表达式替换字符串:
    28. // String.replaceAl()
    29. String s1 = "The quick brown fox jumps over the lazy dug.";
    30. System.out.println(s1.replaceAll("\\s+"," "));
    31. // 这里$1会捕获前面的分组"(\\w+)"
    32. // The quick brown fox jumps over the lazy dug.
    33. System.out.println(s1.replaceAll("\\s+"," ").replaceAll("(\\w+)", "$1"));
    34. }
    35. }
    36. [a, b, c]
    37. [a, b, , c]
    38. [a, b, c]
    39. [a, b, c]
    40. ----
    41. brown, start=10, end=15
    42. fox, start=16, end=19
    43. over, start=26, end=30
    44. -----
    45. The quick brown fox jumps over the lazy dug.
    46. The quick brown fox jumps over the lazy dug.
    1. # python版本:
    2. import re
    3. # 使用正则表达式分割字符串:
    4. print(re.split(r'\s',("a b c"))) # ["a", "b", "c"]
    5. print(re.split(r'\s',("a b c"))) # ["a", "b", "", "c"]
    6. print(re.split(r'\s+',("a b c"))) # ["a", "b", "c"]
    7. print(re.split(r'[\,\;\s]+',("a, b c"))) # ["a", "b", "c"]
    8. print("----")
    9. # 使用正则表达式搜索字符串:
    10. s = "The quick brown fox jumps over the lazy dug."
    11. p = re.compile(r'\w*o\w*', re.I) # 提取出单词中带有o的单词
    12. m = p.findall(s) #['brown', 'fox', 'over']
    13. print(m)
    14. it = re.finditer(r'\w*o\w*',s, re.I)
    15. for match in it:
    16. print(match.group())
    17. print("-----")
    18. # 使用正则表达式替换字符串:
    19. s1 = "The quick brown fox jumps over the lazy dug."
    20. s2 = re.sub(r'\s+',' ',s1)
    21. print(s2)
    22. # 这里$1会捕获前面的分组"(\\w+)"
    23. # The quick brown fox jumps over the lazy dug.
    24. print(re.sub(r'(\w+)','\g<1>',s2))
    25. ['a', 'b', 'c']
    26. ['a', 'b', '', 'c']
    27. ['a', 'b', 'c']
    28. ['a', 'b', 'c']
    29. ----
    30. ['brown', 'fox', 'over']
    31. brown
    32. fox
    33. over
    34. -----
    35. The quick brown fox jumps over the lazy dug.
    36. The quick brown fox jumps over the lazy dug.

     

  • 相关阅读:
    C#下WinForm多语种切换
    generate by chatgpt:应用上线前的checkList(部分是我自己的回答)
    数据库知识点整合
    leetcode做题笔记143. 重排链表
    linux核心知识梳理
    “2024国际数字能源展”推动绿色低碳发展,助力实现“双碳”目标
    前缀树的设计与实现
    聚类分析字符串数组
    基于springboot+vue的戒毒所人员管理系统毕业设计源码251514
    Vue--keep-alive--使用/实例
  • 原文地址:https://blog.csdn.net/sb_jb/article/details/128081285