表达式模板及普通函数调用方式的效率差异为何是这种情况?
最近非常关心不同函数实现方式的效率差异。所以做了下面这组比较。比较的内容是三个Vector的连加,即
Y=M1+M2+M3
本次比较采用的函数的实现方式包含如下5种:
T1-表达式模板方式实现的运算符重载,请注意这不是通常的运算符重符
T2-两两加法,按地址方式传递参数,以地址方式返回结果
vector& vector::add2(const vector& m1, const vector& m2);
T3-两两加法,按地址方式传递参数,以对象方式返回结果。这种方式相当于常用的运算符重载
friend vector add(const vector& m1, const vector& m2);
T4-直接写函数实现连加,以对象方式返回结果
friend vector addall(const vector&m1, const vector&m2, const vector &m3);
T5-直接写函数实现连加,以地址方式返回结果
vector& vector::addall2(const vector&m1, const vector&m2, const vector &m3);
大家是否能猜一下哪种法最快?哪种方法最慢?
根据网上查到的资料,表达式模板实现的运算符重载速度应该非常快,我原本觉得T1的速度应该是接近T4,结果却让自己大失所望。其耗时却是几种方法中最多的。实在想不出来它居然比常用的运算符重载的效率还低。这究竟是什么原因造成的呢?肯请各位高人指点迷津!
除了T1之后,其他方法的效率排序是 T5 > T4 > T2 > T3,该结果与起初的估计一致。由于实际做计算库的时候,采用T4和T5两种方式是不现实的,从测试结果看,T2可能是最可行的方式,这也是自己写运算库时所使用的方式。
需要说明的是,由于自己刚开始接触表达式模式,上述结果也有可能是程序设计方面的失误导致的。因此,给出了自己的测试程序,需要说明的是,这只是一段非常粗陋的代码,计算中甚至没有Vector的元素个数判断。但用于测试应该没有什么问题。
耗时结果如下:
T1:15
T2:9
T3:12
T4:8
T5:5
Press any key to continue . . .
源程序:
#include <cstdlib>
#include <iostream>
#include <time.h>
template <class T1, class T2>
class MatrixSum {
private:
const T1 &u; const T2 &v;
public:
MatrixSum(const T1 &u1, const T2 &v1):u(u1),v(v1) {};
int ndim() const {return u.get_dim();};
double operator [] (int i) const {return u[i]+v[i];};
};
template <class T1, class T2>
MatrixSum<T1, T2> operator + (const T1& m1, const T2 &m2) {
return MatrixSum<T1, T2>(m1, m2);
};
class vector
{
protected:
size_t dim;
double *ets;
public:
vector(size_t n, const double* const pd) : dim(n)
{
ets = new double[dim];
memcpy(ets, pd, dim*sizeof(double));
}
vector(size_t n = 0, double d = 0.0)
: dim(n)
{
ets = new double[dim];
for (size_t i=0; i<dim; ++i) {
ets[i] = d;
}
}
vector(const vector& v)
{
dim = v.dim;
ets = new double[dim];
memcpy(ets, v.ets, sizeof(double)*dim);
}
vector& operator=(const vector& v)
{
if (this != &v) { // 防止自己拷贝自己
if (dim != v.dim) {
exit(1);
}
memcpy(ets, v.ets, sizeof(double)*dim);
}
return *this;
}
template <class T1, class T2>
vector& operator=(const MatrixSum <T1, T2> &RES)
{
for (size_t i=0; i<dim; ++i) {
ets[i] = RES[i];
}
}
~vector()
{
delete[] ets;
}
public:
inline double operator[](size_t i) const { return ets[i];}
inline double& operator[](size_t i){return ets[i];}
size_t get_dim() const {return dim;}
double* get_ptr() const {return ets;}
vector& add2(const vector& m1, const vector& m2) {
for (unsigned i=0; i<dim; i++) ets[i]=m1.ets[i]+m2.ets[i];
return *this;
};
vector& addall2(const vector&m1, const vector&m2, const vector &m3) {
for (unsigned i=0; i<dim; i++) ets[i]=m1.ets[i]+m2.ets[i]+m3.ets[i];
return *this;
};
friend vector add(const vector& m1, const vector& m2);
friend vector addall(const vector&m1, const vector&m2, const vector &m3);
};
vector add(const vector& m1, const vector& m2) {
vector res(m1.get_dim(), 1.0);
for (unsigned i=0; i<res.dim; i++) res.ets[i]=m1.ets[i]+m2.ets[i];
return res;
}
vector addall(const vector&m1, const vector&m2, const vector &m3) {
vector res(m1.get_dim(), 1.0);
for (unsigned i=0; i<res.dim; i++) res.ets[i]=m1.ets[i]+m2.ets[i]+m3.ets[i];
return res;
};
void print (const vector& M) {
std::cout<<"["<<M.get_dim()<<"]"<<std::endl;
for (unsigned i=0; i<M.get_dim(); i++) std::cout<<M[i]<<" ";
std::cout<<std::endl;
}
using namespace std;
int main(int argc, char *argv[])
{
unsigned dim=50000, niter=9000, k;
vector m1(dim, 0.1), m2(dim, 0.2), m3(dim, 3.2),
m4(dim, 0.2), m5(dim, 0.8), m6(dim, 0.3), m7(dim, 2.2),
t1(dim, 0.1), t2(dim, 0.1), t3(dim, 0.1);
time_t t, t0; time(&t0);
for (k=0; k<niter; k++) m4=m1+m2+m3;
time(&t); std::cout<<"T1:"<<t-t0<<std::endl; t0=t;
for (k=0; k<niter; k++) m5=t2.add2(t1.add2(m1, m2), m3);
time(&t); std::cout<<"T2:"<<t-t0<<std::endl; t0=t;
for (k=0; k<niter; k++) m6=add(add(m1, m2), m3);
time(&t); std::cout<<"T3:"<<t-t0<<std::endl; t0=t;
for (k=0; k<niter; k++) m7=addall(m1, m2, m3);
time(&t); std::cout<<"T4:"<<t-t0<<std::endl; t0=t;
for (k=0; k<niter; k++) m7=t1.addall2(m1, m2, m3);
time(&t); std::cout<<"T5:"<<t-t0<<std::endl; t0=t;
system("PAUSE");
return EXIT_SUCCESS;
}