如何找出 python list 中有重复的项
比较容易记忆的是用内置的set
l1 = ['b','c','d','b','c','a','a']
l2 = list(set(l1))
print l2
还有一种据说速度更快的,没测试过两者的速度差别
l1 = ['b','c','d','b','c','a','a']
l2 = {}.fromkeys(l1).keys()
print l2
这两种都有个缺点,祛除重复元素后排序变了:
['a', 'c', 'b', 'd']
如果想要保持他们原来的排序:
用list类的sort方法
l1 = ['b','c','d','b','c','a','a']
l2 = list(set(l1))
l2.sort(key=l1.index)
print l2
也可以这样写
l1 = ['b','c','d','b','c','a','a']
l2 = sorted(set(l1),key=l1.index)
print l2
也可以用遍历
l1 = ['b','c','d','b','c','a','a']
l2 = []
for i in l1:
if not i in l2:
l2.append(i)
print l2
上面的代码也可以这样写
l1 = ['b','c','d','b','c','a','a']
l2 = []
[l2.append(i) for i in l1 if not i in l2]
print l2
这样就可以保证排序不变了:
['b', 'c', 'd', 'a']
1 #!/usr/bin/env python 2 # coding: utf-8 3 4 import os 5 import sys 6 import string 7 import operator 8 import re 9 import threading 10 import csv 11 12 from time import sleep,ctime 13 from collections import defaultdict 14 from collections import Counter 15 16 17 def test_01(): 18 #content ==> ###pos=350143600,pts=2676718### 19 #filename="F:\\yingc\\work\\goxceed-dvbs-hd\\6605\\solution\\aa" 20 filename="./aa" 21 pos=-1 22 dts=-1 23 poslist=[] 24 dtslist=[] 25 26 str1="###pos=" 27 str2=",pts=" 28 29 f = open(filename) 30 for line in f: 31 aa=line[0:len(str1)] 32 if aa == str1: 33 pos=line[len(str1):line.index(str2)] 34 dts=line[line.index(str2)+len(str2):len(line)-3-1] 35 poslist.append(pos) 36 dtslist.append(dts) 37 f.close() 38 39 #s=[11,22,11,44,22,33] 40 d = defaultdict(list) 41 for k,va in [(v,i) for i,v in enumerate(poslist)]: 42 d[k].append(va) 43 #print d.items() 44 count=0 45 for value in d.items(): 46 if len(value[1])>1: 47 print value 48 count=count+1 49 print "poslen:"+str(len(poslist))+",dtslen"+str(len(dtslist)) 50 print str(len(d))+","+str(count) 51 52 #d = defaultdict(list) 53 #for k,va in [(v,i) for i,v in enumerate(dtslist)]: 54 # d[k].append(va) 55 ##print d.items() 56 #for value in d.items(): 57 # if len(value[1])>1: 58 # print value 59 60 #print Counter([11,22,11,44,22,33]) 61 62 63 64 65 if __name__ == "__main__": 66 test_01() 67 print "finish" 68
aa文件中的内容如:
###pos=1349796,pts=15015### ###pos=2337820,pts=27986### ###pos=2705098,pts=29988### ###pos=6660200,pts=54721### ###pos=8055314,pts=61061### ###pos=8871800,pts=65315### ###pos=9503420,pts=68401### ###pos=12855218,pts=88338### ###pos=14253082,pts=98765### ###pos=15813764,pts=109192### ###pos=15813764,pts=109192### ###pos=15813764,pts=109192### ###pos=15813764,pts=109192### ###pos=16056146,pts=110735### ###pos=16394580,pts=113988### ###pos=17011532,pts=119911### ###pos=17257542,pts=122372### ###pos=17417974,pts=124040### ###pos=17816976,pts=128169### ###pos=17993398,pts=129838### ###pos=18302190,pts=132215### ###pos=19166088,pts=139055### ###pos=19675276,pts=143059### ###pos=19994992,pts=146146###