• re 正则从文本中提取全球电话和邮箱


    1. 提取电话

    text = '''
    "(+91)966-352-2720", "☎ +91 9663522720"
     9663522720
    +79082343434   8(912)2342554,  +7 982 342 sdfdsf 34 34 fsf 8-923-132-34-23 +7 982 342 34 34! sdfsd
    13045542316
    01727 830357
    Tlf. 788 96 888
    8553664787;3
    '(866) 997-3523(615) 488-4607'
    +TollFree:1-888-412-3160
    914-345-0800 1
    '+?911235410HorariodeLV:09:0018:00'
    "Telephone:4697021130"
    CALL US: +91- 421- 435 00 02, 224 84 72
    "+57x018000423 670"
    "416-998-8438 | 416-630-3053"
    "Whatsapp + 44 75 88755 173"
    (852)95788980 | Tel: (852) 23428887"
    phone: +974 4411-0147  |  : +974 5512-3374
    fax: 517/253-7366
    ¡llámanos! (55) 71 00 35 00
    +dubai:043804009|abudhabi:024146688
    monfri:9:00am12:30pm/2:00pm5:30pm(cet)33983400075
    https:wa-me/15551234567
    216-812-1615 ext 115
    wsp +56 9 5713 2204
    call us +91 96888 78333
    +contact: +91 70953 23456
    15551234569/15551234567
    port perry - 289-485-brew (2739) newmarket - 289-803-9111
    (518) 858u200b-u200b9589
    +971 58505 8980
    waes footwear, u2028suite 2, duke st chambers, bridge street, tq7 1hx, united kingdom (+44) 020 3287 7957
    contact-alexy-metals-216-410-8661
    toll free +1 (888) 815-6150
    call us: +91- 421- 435 00 02, 224 84 72
    +61 405 385 704
    (0044) (0)1782 611 599
    0123-456-7890
    55 6930 9729
    +1 (800) 123 456 789
    '''
    numbers0 = re.findall("[\(\+\)]*[\+\d\)]*[\( -]?\d{3}[\) -]?\d{3}[ -]?\d{2}[ -]?\d{2}|[\(]?[0-9]{3}[\)]?[- \.]?[0-9]{3}[- \.]?[0-9]{4,6}|[\(\+\)/\d+]* *[\-\d]{8,}|[\+\d ]*[\d ]{5,} ?[\d]{5,}", text)
    numbers0 = [t[1:] if t.startswith(')') or t.startswith('/') else t.strip() for t in numbers0]
    print('numbers0:',numbers0)
    
    numbers1 = re.findall(r"[\(\+\)]*[1-9]* ?[0-9.\-\(\)]{8,} ?[0-9]", text,re.S)
    print('numbers1:',numbers1)
    
    numbers2 = re.findall(r"[\(\+\d\)]* ?[\(]?[0-9]{3}[\)]?[- \.]?[0-9]{3}[- \.]?[0-9]{4,6}", text,re.S)
    print('numbers2:',numbers2)
    
    numbers3 = re.findall(r"[\+\d\(\) ]+[\d -]{2,} ?[\d-]{2,}", text)
    numbers3 = [t[1:].strip() if t.strip()[0] not in ['+','('] and not t.strip()[0].isdigit() else t.strip() for t in numbers3 if len(re.sub('\D','',t))>7]
    
    print('numbers3:',numbers3)

    2.提取邮箱

    emails="""
    geon@ihateregex.io
    test@gmail.com mail@test.org
    mail@testing.com
    hello@
    @test
    email@gmail
    theproblem@test@gmail.com
    """
    emails_0 =  re.findall(r'[a-zA-Z]?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,15}', emails,re.S)
    print('emails_0:',emails_0)
  • 相关阅读:
    VMware Workstation Pro下载安装
    [开学季]ChatPaper全流程教程
    vue播放报警音实现过程
    Spring JDK动态代理(附带实例)
    做了这些年开发,今天第一次梳理了这三种常用的变量
    Linux —用户和组
    HyBird App(混合应用)核心原理JSBridge
    【技术实操】银河高级服务器操作系统实例分享,达梦数据库服务器 oom 问题分析
    每周一算法:双向深搜
    魔法诗~~~一套基于Vue开发的实用、高端、炫酷的响应式前端网页!!!
  • 原文地址:https://blog.csdn.net/lwdfzr/article/details/126466412