>>> import re >>> s = 'abc' >>> s = r'abc' >>> re.findall(s,'abcdfdsajk') ['abc']
例如,[akm$]将匹配字符"a", "b", "c", 或 "$" 中的任意一个
通过元字符“[string]”匹配
>>> st = 'top tip tap tsp tep' >>> res = r'top' >>> re.findall(res,st) ['top'] >>> res = r't[io]p' >>> re.findall(res,st) ['top', 'tip']
[^string]匹配不包含“io” 的字符串
>>> res = r't[^io]p' >>> re.findall(res,st) ['tap', 'tsp', 'tep']
>>> s = "hello world,hello boy" >>> r = r"hello" >>> re.findall(r,s) ['hello', 'hello'] >>> r = r"^hello" >>> re.findall(r,s) ['hello'] >>> r = r"boy$" >>> re.findall(r,s) ['boy']
>>> import re >>> r1 = r"\d{3,4}-?\d{8}" >>> re.findall(r1,'020-88776655') ['020-88776655']
>>> email = r'\w{3}@\w+(\.com|\.net)' >>> re.match(email,'abc@qq.com') <_sre.SRE_Match object at 0x7f81fea30828> >>> re.match(email,'bbb@163.net') <_sre.SRE_Match object at 0x7f81fea470a8> >>> re.match(email,'ccc@redhat.org') >>>
>>> import re >>> r1 = r"\d{3,4}-?\d{8}" >>> p_tel = re.compile(r1) >>> p_tel <_sre.SRE_Pattern object at 0x7f81fead6ab0> >>> p_tel.findall('020-88776655') ['020-88776655']
>>> string_re.match('pmghong hello') <_sre.SRE_Match object at 0x7f81fea28578> >>> string_re.match('hello pmghong ') >>> >>> string_re.search('pmghong hello') <_sre.SRE_Match object at 0x7f81fea285e0> >>> string_re.search('hello pmghong') <_sre.SRE_Match object at 0x7f81fea28578>
>>> string_re.match('pmghong hello') <_sre.SRE_Match object at 0x7f81fea28648> >>> x = string_re.match('pmghong hello') >>> if x: ... print 'OK' ... OK >>> string_re.match('hello pmghong') >>> x = string_re.match('hello pmghong') >>> if x: ... print 'OK' ... else: ... print 'Not OK' ... Not OK
>>> s = "hello python" >>> r1 = r'hello' >>> re.match(r1,s) <_sre.SRE_Match object at 0x7f81fea285e0> >>> >>> x = re.match(r1,s) >>> x.group() 'hello' >>> x.start() 0 >>> x.end() 5 >>> x.span() (0, 5)
>>> s = "hello world" >>> s.replace('world','boy') 'hello boy' >>> s.replace('w...d','boy') 'hello world' >>> >>> rs = r'w...d' >>> re.sub(rs,'boy','world would woked hello') 'boy boy boy hello'
>>> re.subn(rs,'boy','world would woked hello') ('boy boy boy hello', 3)
>>> ip = '192.168.10.1' >>> ip.split('.') ['192', '168', '10', '1'] >>> s = '111+222-333*444/555' >>> re.split(r'[\+\-\*\/]',s) ['111', '222', '333', '444', '555']
>>> p = re.compile('ab*',re.IGONRECASE)
IGNORECASE,I 忽略字符串的大小写
>>> string_re = re.compile(r'pmghong',re.I) >>> string_re.findall('PMGHONG') ['PMGHONG'] >>> string_re.findall('pmghong') ['pmghong'] >>> string_re.findall('Pmghong') ['Pmghong']
DOTALL,S 使“.”匹配包括换行在内的所有字
>>> r1 = r"baidu.com" >>> re.findall(r1,'baidu.com') ['baidu.com'] >>> re.findall(r1,'baidu_com') ['baidu_com'] >>> re.findall(r1,'baidu com') ['baidu com'] >>> re.findall(r1,'baidu\ncom') [] >>> re.findall(r1,'baidu\ncom',re.S) ['baidu\ncom'] >>> re.findall(r1,'baidu\tcom',re.S) ['baidu\tcom']
>>> s = ''' ... hello boy ... boys and girls ... hello girl ... what a nice day ... ''' >>> r1 = r'^hello' >>> re.findall(r1,s) []
原因是docstring 是这样存放数据的:
>>> s '\nhello boy\nboys and girls\nhello girl\nwhat a nice day\n'
所以需要加入M属性,进行多行匹配
>>> re.findall(r1,s,re.M) ['hello', 'hello']
>>> tel = r''' ... \d{3,4} ... -? ... \d{8} ... ''' >>> re.findall(tel,'020-88776655') [] >>> tel '\n\\d{3,4}\n-?\n\\d{8}\n'
解决办法就是加入re.X 属性
>>> re.findall(tel,'020-88776655',re.X) ['020-88776655']
附上网上搜到的一张表