>>> import re
>>> result = re.search("sanchuang","hello world,this is sanchuang")
>>> result
<_sre.SRE_Match object; span=(20, 29), match='sanchuang'>
>>> result = re.search("sanchuang123","hello world,this is sanchuang")
>>> result
>>> print(result)
None
groups返回一个包含所有小组字符串的元组,从 1 到 所含的小组号。
start方法提供了原始字符串中匹配开始的索引
end方法提供了原始字符串中匹配开始的索引
match只能从字符串开头查找,开始的部分没有,那就匹配不上
>>> result = re.match("san.*$","hello world,this is sanchuang")
>>> result
>>> result = re.match("hello","hello world,this is sanchuang")
>>> result
<_sre.SRE_Match object; span=(0, 5), match='hello'>
result.start()
0 #开始的位置
result.end()
5 #结束的位置
result.group()
sanchuang #匹配的结果
如果匹配到了,就会返回match对象,没有匹配返回None
>>> result = re.match("hello","hello world,this is sanchuang")
>>> result
None
>>> result = re.match("hello","hello")
>>> result.group()
hello
>>> msg = "i love pythonpython1 python2"
>>> re.findall("python",msg)
['python', 'python', 'python']
>>> re.finditer("python",msg)
<callable_iterator object at 0x7fc3e6126470>
>>> for i in re.finditer("python",msg):
... print(i,i.group())
...
<_sre.SRE_Match object; span=(7, 13), match='python'> python
<_sre.SRE_Match object; span=(13, 19), match='python'> python
<_sre.SRE_Match object; span=(21, 27), match='python'> python
将string中匹配的内容替换为新内容
>>> msg = "i love python1 pythonyy python123 pythontt"
>>> re.sub("python[0-9]","**",msg)
'i love ** pythonyy **23 pythontt'
>>> msg = "i love python1 pythonyy python123 pythontt"
>>> re.sub("python\b","**",msg)
>>> msg = "i love python1 pythonyy python123 pythontt"
>>> re.sub("python\d","**",msg)
'i love ** pythonyy **23 pythontt'
正则表达式重用,先把正则表达式编译成一个对象
>>> reg = re.compile("python\d")
>>> reg.findall(msg)
['python1', 'python1']
最简单的正则表达式是那些仅包含简单的字母数字字符的表达式,复杂的正则可以实现强大的匹配
正则匹配区分大小写
匹配cat或dog
匹配a+非小写字母
匹配任何(除\n外)的单个字符,它仅仅只以出现在方括号字符组以外
#区间匹配
import re
ret = re.findall("[Pp]y","Python3 pth pp ppython") #[]中任选一个字符匹配
print(ret)
#按编码排序
ret = re.findall("[A-z]y","python3 pth paaapaqzython")
print('aaaa',ret)
ret = re.findall("[^A-z]y","Python3 1ypth ppython")
print(ret)
#或匹配
msg = "xlo soaodjas xsak dsa xx"
print(re.findall("so|lo",msg))
# . 占位符 表示除了换行符以外的任意一个字符
ret = re.findall("p.thon","Python python pgthon pthon p thon p\nthon")
print(ret)
输出:
['Py', 'py']
aaaa ['py', 'zy']
['1y']
['lo', 'so']
['python', 'pgthon', 'p thon']
正则表达式字符前面加r,让转义字符原样交给正则表达式引擎去匹配
如果正则表达式有反斜杠,建议前面加r
import re
ret = re.findall(r'\bword',"啊word abcword 1word #word word123 $word")
ret1 = re.findall(r'\wword',"啊word abcword 1word #word word123 $word")
print(ret,ret1)
输出:
['word', 'word', 'word'] ['啊word', 'cword', '1word']
msg = """Python
python
python123
python345
"""
ret = re.findall("^[Pp]ython",msg)
print(ret)
+ 匹配前一项n-1次* 匹配前一项0-n次,任意次print("---------通配符---------")
ret = re.findall("py?","py p python pyython")
print(ret)
ret = re.findall("py+","py p python pyython")
print(ret)
ret = re.findall("py*","py p python pyython")
print(ret)
#{n,m} 匹配前一项n-m次{n,} {,m}
ret = re.findall("py{2,4}","pyyy py pyyyyypyypy")
print(ret)
ret = re.findall("Python",msg,re.M|re.I)
print(ret)
ret = re.findall("[Pp]ython",msg,re.M)
print(ret)
ret = re.findall("Python.*",msg,re.I)
print(ret)
ret = re.findall("Python.*",msg,re.I|re.S)
print(ret)
#默认贪婪模式 -- 尽可能匹配长的
msg = "testbbtedst2"
ret = re.findall(".*",msg)
print(ret)
ret = re.findall(".*?",msg)
print(ret)
当使用分组时,除了可以获得整个匹配,还能够获得选择每一个单独组,使用 () 进行分组
ret = re.search(r"(\d{3})-(\d{3})-(\d{3})","abc123-456-789aaa")
print(ret.group())
print(ret.group(1))
print(ret.group(2))
print(ret.group(3))
#捕获分组(正则表达式)(以上)
# 分组匹配上之后,会把匹配上的数据放到内存中,并给定一个从1开始的索引
#非捕获分组(?:正则表达式)
# 只分组不捕获
ret = re.search(r"(\d{3})-(?:\d{3})-(\d{3})","abc123-456-789aaa")
print(ret.group())
print(ret.group(1))
print(ret.group(2))
使用()分用,用\0, \1, \2引用 (\0表示匹配的整个
#使用findall 有捕获分组的话,只会显示捕获分组的内容
msg = "aa bb aa bb"
print(re.search(r"(\w+)\s(\w+)\s\1\s\2",msg))
print(re.findall(r"(\w+)\s(\w+)\s\1\s\2",msg))
msg = "aa bb aa cc"
print(re.search(r"(\w+)\s(\w+)\s\1\s\2",msg))
msg = "comyy@123.comcom123@qq.comaaa@126.combbb@163.comcc@abc.com"
#把里面123 126 163 邮箱捞出来
print(re.findall(r"(?:\.com)?(\w+@(?:123|126|163).com)",msg))
正则表达式的断言分为:先行断言(lookahead)和后行断言(lookbehind)
#确定位置,不占用匹配宽度
s = "sc1 hello sc2 hello"
#匹配后面是 sc2的hello
print(re.findall(r"\w+ hello(?= sc)",s))
#匹配后面不是 sc2的hello
print(re.findall(r"\w+ hello(?! sc)",s))
#匹配前面是sc2 的hello
print(re.findall(r"(?<=sc2 )hello",s))
#匹配前面不是sc2 的hello
print(re.findall(r"(?,s))
msg = """
1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:9e:70:2c brd ff:ff:ff:ff:ff:ff
inet 192.168.174.130/24 brd 192.168.174.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe9e:702c/64 scope link
valid_lft forever preferred_lft forever
"""
print(re.findall(r"(?<=inet ).*(?=/24)",msg))
输出:['192.168.174.130']