目录
5、写一个正则表达式,使其能同时识别下面所有的字符串:'bat', 'bit', 'but', 'hat', 'hit', 'hut‘
- >>> import re
- >>>
- >>> s = "abc123 90years $hello world!"
- >>> p = re.compile(r"^[a-zA-Z]+",re.M)
- >>> print(p.match(s).group())
- abc
- >>> s = "2022 abc123 90years $hello world!"
- >>> p = re.compile(r"^\d+",re.M)
- >>> print(p.match(s).group())
- 2022
- >>> s = "abc123 90years $hello world!"
- >>> p = re.compile(r"^\w+",re.M)
- >>> print(p.match(s).group())
- abc123
- import re
-
- s = """abc123
- 90ww dd123
- hello world!
- Hi girl~
- ###test"""
-
- p = re.compile(r"^\w+$",re.M)
- print(p.findall(s))
- >>> s=['bat', 'bit', 'but', 'hat', 'hit', 'hut']
- >>> for i in s:
- ... print(re.match(r"^[b|h][a|i|u]t$",i).group())
- ...
- bat
- bit
- but
- hat
- hit
- hut
- >>> word_lst = ["Abc112_","akkk_222A","_12ABc","123abcA",\
- ... "test@","!num","he@#he"]
- >>> for word in word_lst:
- ... if re.match(r"^[a-zA-Z_]\w*$",word):
- ... print(word)
- ...
- Abc112_
- akkk_222A
- _12ABc
- import re
-
- # p = re.compile(r"[0-9]{4}-[01][0-9]-[0-3][0-9]\s([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]") # 月份、日期处理没有细致处理
- p = re.compile(r"^(?:(?!0000)[0-9]{4}-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1[0-9]|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[0-9]{2}(?:0[48]|[2468][048]|[13579][26])|(?:0[48]|[2468][048]|[13579][26])00)-02-29)\s([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]$") # 网上找到 https://blog.csdn.net/lxcnn/article/details/4362500
-
- with open("p11-7.txt") as fp:
- for line in fp:
- if p.match(line):
- print(line.strip())
文档内容:
- 1970-01-01 00:00:00
- 2099-12-31 23:59:59
- 2022-02-22 12:22:33
- 1234-08-05 24:00:00
- 1234-08-05 11:00:00
- 1991-02-31 11:00:00
- 2022-14-05 11:00:00
- 2022-03-55 11:00:00
- 2022-11-05 26:00:00
- 2022-10-05 12:61:00
- 2022-12-05 12:00:79
- >>> import re
- >>> re.match(r'\\home',"\home")
- <re.Match object; span=(0, 5), match='\\home'>
- >>> re.match(r'\\home',"\home").group()
- '\\home'
- >>> print(re.match(r'\\home',"\home").group())
- \home
- import re
-
- s="I just had to come over and say 'Amazing eyes'. You wearing color contacts?"
- p = re.compile(r"\b[a-z]+\b",re.I)
- print(p.findall(s))
- s = '"http://www.gloryroad.cn'
- p = re.compile(r"((http|ftp|https)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?")
- print(p.search(s).group())
- >>> s = '<a href="http://www.gloryroad.cn">光荣之路官网</a>'
- >>> p = re.compile(r"http[s]?://\w+(\.\w+\.\w+)+")
- >>> print(p.search(s).group())
- http://www.gloryroad.cn
- import re
- word_list = []
- with open("xxx.txt") as file_obj:
- content = file_obj.read()
- word_list = re.findall(r"\b[a-z]+\b",content,re.I)
- print("word number: ",len(word_list))