python – 使用itertools.combinations的最快方法
我需要加快下面的功能:
import numpy as np
import itertools
import timeit
def combcol(myarr):
ndims = myarr.shape[0]
solutions = []
for idx1, idx2, idx3, idx4, idx5, idx6 in itertools.combinations(np.arange(ndims), 6):
c1, c2, c3, c4, c5, c6 = myarr[idx1,1], myarr[idx2,2], myarr[idx3,1], myarr[idx4,2], myarr[idx5,1], myarr[idx6,2]
if c1-c2>0 and c2-c3<0 and c3-c4>0 and c4-c5<0 and c5-c6>0 :
solutions.append(((idx1, idx2, idx3, idx4, idx5, idx6),(c1, c2, c3, c4, c5, c6)))
return solutions
X = np.random.random((20, 10))
Y = np.random.random((40, 10))
if __name__=='__main__':
from timeit import Timer
t = Timer(lambda : combcol(X))
t1 = Timer(lambda : combcol(Y))
print('t : ',t.timeit(number=1),'t1 : ',t1.timeit(number=1))
结果:
t : 0.6165180211451455 t1 : 64.49216925614847
该算法对于我的标准使用来说太慢了(myarr.shape [0] = 500).是否有NumPy方法来减少此功能的执行时间(不浪费太多内存)?是否可以在Cython中实现该问题?
我已经尝试使用cProfile查看哪些部分很慢.这里的大部分时间都花在调用combcol()上.
import profile
........
........
profile.run('print(len(combcol(Y))); print')
144547
144559 function calls in 39.672 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
144547 0.641 0.000 0.641 0.000 :0(append)
1 0.000 0.000 0.000 0.000 :0(arange)
2 0.000 0.000 0.000 0.000 :0(charmap_encode)
1 0.000 0.000 39.672 39.672 :0(exec)
1 0.000 0.000 0.000 0.000 :0(len)
1 0.000 0.000 0.000 0.000 :0(print)
1 0.000 0.000 0.000 0.000 :0(setprofile)
1 0.094 0.094 39.672 39.672 <string>:1(<module>)
2 0.000 0.000 0.000 0.000 cp850.py:18(encode)
1 38.938 38.938 39.578 39.578 essaiNumpy4.py:13(combcol)
1 0.000 0.000 39.672 39.672 profile:0(print(len(combcol(Y))); print)
0 0.000 0.000 profile:0(profiler)
最后我修改了这样的代码:
def combcol2(myarr):
ndims = myarr.shape[0]
myarr1 = myarr[:,1].tolist()
myarr2 = myarr[:,2].tolist()
solutions = []
for idx1, idx2, idx3, idx4, idx5, idx6 in itertools.combinations(range(ndims), 6):
if myarr1[idx1] > myarr2[idx2] < myarr1[idx3] > myarr2[idx4] < myarr1[idx5] > myarr2[idx6]:
solutions.append(((idx1, idx2, idx3, idx4, idx5, idx6),(myarr1[idx1], myarr2[idx2], myarr1[idx3], myarr2[idx4], myarr1[idx5], myarr2[idx6])))
return solutions
X = np.random.random((40, 10))
if __name__=='__main__':
from timeit import Timer
t = Timer(lambda : combcol2(X))
print('t : ',t.timeit(number=1))
结果:
t : 4.341582240200919