再问一个代码优化的问题

领域专家: 人工智能技术领域

2016-01-22 03:42:44





int qx_tree_filter::filter(double*cost,double*cost_backup,int nr_plane)

{

	memcpy(cost_backup,cost,sizeof(double)*m_h*m_w*nr_plane);

	int*node_id=m_node_id;

	int*node_idt=&(node_id[m_nr_pixel-1]);

	for(int i=0;i<m_nr_pixel;i++)//for each pixel in image

	{

		int id=*node_idt--;

		int id_=id*nr_plane;

		int nr_child=m_mst_nr_child[id];

		if(nr_child>0)

		{

			double*value_sum=&(cost_backup[id_]);

			for(int j=0;j<nr_child;j++)

			{

				int id_child=m_mst_children[id][j];

				int id_child_=id_child*nr_plane;

				double weight=m_table[m_mst_weight[id_child]];

				//value_sum+=m_mst_value_sum_aggregated_from_child_to_parent[id_child]*weight;

				double*value_child=&(cost_backup[id_child_]);

				for(int k=0;k<nr_plane;k++)

				{

					value_sum[k]+=(*value_child++)*weight;

				}

			}

		}

		//else

		//{

		//	memcpy(&(cost_backup[id_]),&(cost[id_]),sizeof(double)*nr_plane);

		//}

		//printf("[id-value-weight]: [%d - %3.3f - %3.3f]\n",id,m_mst_[id].value_sum_aggregated_from_child_to_parent,m_mst_[id].weight_sum_aggregated_from_child_to_parent);

	}

	int*node_id0=node_id;

	int tree_parent=*node_id0++;

	int tree_parent_=tree_parent*nr_plane;

	memcpy(&(cost[tree_parent_]),&(cost_backup[tree_parent_]),sizeof(double)*nr_plane);

	for(int i=1;i<m_nr_pixel;i++)//K_00=f(0,00)[K_0-f(0,00)J_00]+J_00, K_00: new value, J_00: old value, K_0: new value of K_00's parent

	{

		int id=*node_id0++;

		int id_=id*nr_plane;

		int parent=m_mst_parent[id];

		int parent_=parent*nr_plane;



		double*value_parent=&(cost[parent_]);//K_0

		double*value_current=&(cost_backup[id_]);//J_00

		double*value_final=&(cost[id_]);

		double weight=m_table[m_mst_weight[id]];//f(0,00)



		for(int k=0;k<nr_plane;k++) 

		{

			double vc=*value_current++;

			*value_final++=weight*((*value_parent++)-weight*vc)+vc;///这句怎么搞？

		}

		//printf("Final: [id-value-weight]: [%d - %3.3f - %3.3f]\n",id,m_mst_[id].value_sum_aggregated_from_parent_to_child,m_mst_[id].weight_sum_aggregated_from_parent_to_child);

	}

	return(0);

}



void qx_nonlocal_cost_aggregation::matching_cost_from_color_and_gradient(unsigned char ***left,unsigned char ***right)

{

	image_zero(m_cost_vol,m_h,m_w,m_nr_plane);

	compute_gradient(m_gradient_left,left);

	compute_gradient(m_gradient_right,right);



	for(int i=0;i<m_nr_plane;i++)

	{

		for(int y=0;y<m_h;y++) //shift the right image by i pixels

		{

			image_copy((&m_image_shifted[y][i]),right[y],m_w-i,3);

			memcpy(&(m_gradient_shifted[y][i]),m_gradient_right[y],sizeof(float)*(m_w-i));

			for(int x=0;x<i;x++) 

			{

				qx_memcpy_u3(m_image_shifted[y][x],right[y][0]);//m_cost_on_border_occlusion;

				m_gradient_shifted[y][x]=m_gradient_right[y][0];//m_cost_on_border_occlusion;

			}

		}

		//for(int y=0;y<m_h;y++) for(int x=0;x<m_w-1;x++) 

		for(int y=0;y<m_h;y++) for(int x=0;x<(m_w);x++) 

		{

			double cost=0;

			for(int c=0;c<3;c++) cost+=abs(left[y][x][c]-m_image_shifted[y][x][c]);

			cost=min(cost/3,m_max_color_difference);

			double cost_gradient=min((double)abs(m_gradient_left[y][x]-m_gradient_shifted[y][x]),m_max_gradient_difference);

			m_cost_vol[y][x][i]=m_weight_on_color*cost+m_weight_on_color_inv*cost_gradient;

		}

	}

}

...全文

374 23 打赏收藏转发到动态举报

写回复

用AI写文章

23 条回复

切换为时间正序

请发表友善的回复…

发表回复

赵4老师 2016-01-27

打赏
举报

班门弄斧一下哈：

register double*value_parent=&(cost[parent_]);//K_0
register double*value_current=&(cost_backup[id_]);//J_00
register double*value_final=&(cost[id_]);
register double weight=m_table[m_mst_weight[id]];//f(0,00)
……
register double vc=*value_current++;
            *value_final++=weight*((*value_parent++)-weight*vc)+vc;///这句怎么搞？

赵4老师 2016-01-27

打赏
举报

引用 21 楼 wangyaninglm 的回复:

[quote=引用 20 楼 zhao4zhong1 的回复:] [quote=引用 15 楼 wangyaninglm 的回复:] [quote=引用 12 楼 zhao4zhong1 的回复:] 请楼主每次提到“快”、“慢”等字眼时，都将我三楼回复的那句话默念至少三遍！

赵老师，我真的没次都是release而且有profile 啊。。。。[/quote] 那我问你你知道耗时最多的那几条语句在第多少行到第多少行吗？[/quote] 你看赵老师你都没认真看截图，我截图里面有呢。。。[/quote]

还真有！

shiter 2016-01-27

打赏
举报

引用 20 楼 zhao4zhong1 的回复:

[quote=引用 15 楼 wangyaninglm 的回复:] [quote=引用 12 楼 zhao4zhong1 的回复:] 请楼主每次提到“快”、“慢”等字眼时，都将我三楼回复的那句话默念至少三遍！

赵老师，我真的没次都是release而且有profile 啊。。。。[/quote] 那我问你你知道耗时最多的那几条语句在第多少行到第多少行吗？[/quote] 你看赵老师你都没认真看截图，我截图里面有呢。。。

赵4老师 2016-01-27

打赏
举报

引用 15 楼 wangyaninglm 的回复:

[quote=引用 12 楼 zhao4zhong1 的回复:] 请楼主每次提到“快”、“慢”等字眼时，都将我三楼回复的那句话默念至少三遍！

赵老师，我真的没次都是release而且有profile 啊。。。。[/quote] 那我问你你知道耗时最多的那几条语句在第多少行到第多少行吗？

日立奔腾浪潮微软松下联想 2016-01-26

打赏
举报

引用 11 楼 wangyaninglm 的回复:

[quote=引用 10 楼 DelphiGuy 的回复:] 可以考虑用intel C++开avx优化试一试，VC++也可以，但是优化水平比intel C++要落后一代，VC++ 2015的优化水平大致相当于intel C++ XE 2013的层次。

高手优化代码这方面的书给我推荐一个吧，就c++代码优化，你说的vs2015开avx是要重写代码还是编译器有相关选项呢？就是我还发现一个问题，vs2015的代码releas跑起来居然比vs2010 的release慢。。。同一台机器。。。[/quote] [quote=引用 11 楼 wangyaninglm 的回复:] 代码优化的书不少，但是大部分还是谈通用的优化原则、算法，象高效程序的奥秘这类书，针对具体架构优化的方法，还得看处理器厂家的资料，Intel 64 and IA-32 Architectures Optimization Reference Manual这种。不用重写代码，就是/arch:AVX、/arch:AVX2这些编译选项，但是VC的优化水平还是比intel c++差不少，经常是你开了特定指令集优化，但是它并没有用到。某些代码编译后比2010编译的代码慢的问题在2013上就有，看反汇编却没有明显的差异。

日立奔腾浪潮微软松下联想 2016-01-26

打赏
举报

引用 11 楼 wangyaninglm 的回复:

[quote=引用 10 楼 DelphiGuy 的回复:] 可以考虑用intel C++开avx优化试一试，VC++也可以，但是优化水平比intel C++要落后一代，VC++ 2015的优化水平大致相当于intel C++ XE 2013的层次。

高手优化代码这方面的书给我推荐一个吧，就c++代码优化，你说的vs2015开avx是要重写代码还是编译器有相关选项呢？就是我还发现一个问题，vs2015的代码releas跑起来居然比vs2010 的release慢。。。同一台机器。。。[/quote] 前面的回复怎么乱套了。代码优化的书不少，但是大部分还是谈通用的优化原则、算法，象高效程序的奥秘这类书，针对具体架构优化的方法，还得看处理器厂家的资料，Intel 64 and IA-32 Architectures Optimization Reference Manual这种。不用重写代码，就是/arch:AVX、/arch:AVX2这些编译选项，但是VC的优化水平还是比intel c++差不少，经常是你开了特定指令集优化，但是它并没有用到。某些代码编译后比2010编译的代码慢的问题在2013上就有，看反汇编却没有明显的差异。

赵4老师 2016-01-26