有关C++AMP程序换了平台无法运行的问题

isaaczhangz 2013-12-23 06:55:38

我是最近由于工作需要赶鸭子上架学编程的。上周花了一周时间在我的Surface pro 2上遍了个有关热传导的实验程序，确认能够良好运行。回家后将整个project拷贝到使用AMD处理器和显卡的两个机子上想运行，则不能运行。两个amd机子显示的错误信息是一样的。如果直接执行exe程序错误是R6010；如果执行debug，一般说在执行完throw.cpp后出错。特征是第一个矩阵都没生成就挂了。求各位大侠解惑。源码有点乱，新手改过无数次，请各位大侠原谅哈。
#include <iostream>
#include <amp.h>
#include <vector>
#include <math.h>
using namespace std;
using namespace concurrency;
//void meshCreating() {
void HeatingWithAMP(array_view<float, 3> &dT, float deltaT, int nLaser, int hei){
parallel_for_each(
dT.extent,
[=](index<3> idx) restrict(amp){
if(0 < idx[1] && nLaser >= idx[1] && idx[0] * (hei + 1 - idx[0])!= 0){
dT[idx]=deltaT;//dT=q/V/cV=(dE/dS)*A*V/V/cV=(dE/dS)A/cV
}
else{
dT[idx]=0;
}
}
);
dT.synchronize();
}

void ParaComputingWithAMP(array_view<float, 1> &deltaR, array_view<float, 3> ¶, int hei, int rii, int ang, float deHeight, float cV, float lamdaDt, float hDt) {
parallel_for_each(
para.extent,
[=](index<3> idx) restrict(amp){
int i = idx[0];
int j = idx[1];
int k = idx[2];
if(k == (k+1)/2*2-1){
if(j == (j+1)/2*2-1){
if(i == (i+1)/2*2-1){
para(idx) = cV * 3.14159265 * deHeight * (deltaR((j+1)/2) * deltaR((j+1)/2) - deltaR((j-1)/2) * deltaR((j-1)/2)) / ang;//cV*V
}
if(i==i/2*2){
if(i*(hei*2-i)==0){//hei方向牛顿冷却定律fi=h*dt*A
para(idx) = hDt * 3.14159265 * (deltaR((j+1)/2) * deltaR((j+1)/2) - deltaR((j-1)/2) * deltaR((j-1)/2)) / ang; //htA
}
else{
para(idx) = lamdaDt * 3.14159265 * (deltaR((j+1)/2) * deltaR((j+1)/2) - deltaR((j-1)/2) * deltaR((j-1)/2)) / ang / deHeight; //hei向传导
}
}
}
if(j==j/2*2){
if(i==(i+1)/2*2-1){
if(rii*2-j==0){//rii方向牛顿冷却定律fi=h*dt*A
para(idx) = hDt * 6.28318531 * deltaR(rii) * deHeight;
}
else{
if(j==0){
para(idx) = 0;
}
else{
para(idx) = lamdaDt * deltaR(j/2) * 12.56637061 * deHeight / ang / (deltaR(j/2+1) - deltaR(j/2-1));//rii向传导
}
}
}
}
}
if(k/2*2==k){
if((i+1)/2*2-1==i){
if((j+1)/2*2-1==j){
if(k*(ang*2-k)==0){
para(idx) = 0; //对称性绝热
}
else{
para(idx) = lamdaDt * (deltaR((j+1)/2) - deltaR((j-1)/2)) * deHeight * ang / (deltaR((j+1)/2) + deltaR((j-1)/2)) / 3.14159265;//ang向传导
}

}
}
}
//if(j>0){
// for(int i = 0; i < hei; i++){
// for(int k = 0; k < ang; k++){
// para(i*2+1, j*2+1, k*2+1) = cV * 3.14159265 * deHeight * (deltaR(j) * deltaR(j) - deltaR(j-1) * deltaR(j-1)) / ang;//cV*V
// para(i*2+2, j*2+1, k*2+1) = lamdaDt * 3.14159265 * (deltaR(j) * deltaR(j) - deltaR(j-1) * deltaR(j-1)) / ang / deHeight; //hei向传导
// para(i*2+1, j*2+1, k*2+2) = lamdaDt * (deltaR(j) - deltaR(j-1)) * deHeight * ang / (deltaR(j) + deltaR(j-1)) / 3.14159265;//ang向传导
// para(i*2+1, j*2+2, k*2+1) = lamdaDt * (deltaR(j) + deltaR(j-1)) * 6.28318531 * deHeight / ang / (deltaR(j+1) - deltaR(j-1));//rii向传导
// }
// }
// }
}
);
para.synchronize();
}
void RisingTransmitingWithAMP(array_view<float, 3> &temp, array_view<float, 3> ¶, array_view<float, 3> &dT, array_view<float, 3> &product, concurrency::extent<3> &e) {

//热传导计算
parallel_for_each(
e,
[=](index<3> idx) restrict(amp) {
int hei = idx[0]+1;
int rii = idx[1]+1;
int ang = idx[2]+1;
int hei2 = idx[0] * 2 + 1;
int rii2 = idx[1] * 2 + 1;
int ang2 = idx[2] * 2 + 1;

product(hei, rii, ang) = temp(hei, rii, ang) + (temp(hei-1, rii, ang) - temp(hei, rii, ang)) * para(hei2-1, rii2, ang2) / para(hei2, rii2, ang2)
+ (temp(hei+1, rii, ang) - temp(hei, rii, ang)) * para(hei2+1, rii2, ang2) / para(hei2, rii2, ang2)
+ (temp(hei, rii-1, ang) - temp(hei, rii, ang)) * para(hei2, rii2-1, ang2) / para(hei2, rii2, ang2)
+ (temp(hei, rii+1, ang) - temp(hei, rii, ang)) * para(hei2, rii2+1, ang2) / para(hei2, rii2, ang2)
+ (temp(hei, rii, ang-1) - temp(hei, rii, ang)) * para(hei2, rii2, ang2-1) / para(hei2, rii2, ang2)
+ (temp(hei, rii, ang+1) - temp(hei, rii, ang)) * para(hei2, rii2, ang2+1) / para(hei2, rii2, ang2) + dT(hei, rii, ang);
//T=T0+C1(T1-T0)...=(1-C(1-6))T0+C1T1...

//测试用
//float a = (temp(hei, rii-1, ang) - temp(hei, rii, ang)) * para(hei2, rii2-1, ang2) / para(hei2, rii2, ang2);
//float b = (temp(hei, rii+1, ang) - temp(hei, rii, ang)) * para(hei2, rii2+1, ang2) / para(hei2, rii2, ang2);
//float t1=temp(hei, rii+1, ang);
//float t0=temp(hei, rii, ang);
//float p1=para(hei2, rii2+1, ang2);
//float p0=para(hei2, rii2, ang2);
}
);
product.synchronize();
parallel_for_each(
product.extent,
[=](index<3> idx) restrict(amp) {
temp[idx]=product[idx];
}
);
temp.synchronize();
}

//主函数
void main() {
//输入模型和边界条件
//生成网格（CPU&GPU）
//float rawMatrix[] = { 27.5, 20, 20, 20, 22.5, 35, 0, 0, 0, 25, 35, 0, 0, 0, 25, 35, 0, 0, 0, 25, 35, 0, 0, 0, 25, 22.5, 10, 10, 10, 17.5 };
float height=1;//mm
int hei=5;//手工决定z轴均分5份
int ang=8;//输入2pi角度等分多少份
float riiMax=12.5;//mm
float riiLaser=1;
float len=riiLaser;
int nLaser=3;//手工决定激光辐照部分rii向均分3份
float cV = 670 * 2.2 * pow(10,-6);//cM=0.8kJ/(kg·K);670J/kg.C;密度:2.2g/cm3 =2.2*10e6kg/mm3
float lamda = 0.0014;//1.4W/m.C
float h = 5 * pow(10,-6);//5W/(m2*K)
float dt = 0.001;
float lamdaDt = lamda * dt;
float hDt = h * dt;
float p = 1;//激光功率，单位瓦特
float A = 0.00001;//1%/m
float deltaT = p * dt * A / (3.14159265 * riiLaser * riiLaser * cV);//dT=q/V/cV=(dE/dS)*A*V/V/cV=(dE/dS)A/cV

float dR;
int rii;
for(rii = nLaser; len < riiMax; rii++)
{
dR = 8 * len / ang;
len = len + dR;
}
len = riiLaser;
float* deltaRMatrix;
deltaRMatrix=new float[rii+2];
deltaRMatrix[0] = 0;
for(rii = 0; rii < nLaser; rii++)
{
//deltaR[len] = riiLaser / nLaser;
deltaRMatrix[rii+1] = (rii + 1) * riiLaser / nLaser;
}
for(rii = nLaser; deltaRMatrix[rii] < riiMax; rii++)
{
//deltaR[len] = 8 * rii / ang;
dR = 8 * deltaRMatrix[rii] / ang;
deltaRMatrix[rii+1] = dR + deltaRMatrix[rii];
}
deltaRMatrix[rii] = riiMax;
array_view<float, 1> deltaR(rii+1, deltaRMatrix);
//int scale=4;
//int scale_p=scale*2-3;
//int scale_t=scale*scale*scale;
//int scale_t = 16;
//float* rawMatrix;
//float* paraMatrix;
//float* offset;
//float rawMatrix[1024] = {100};

//rawMatrix=new float[scale_t];
//offset = rawMatrix;
//for(int ijk=0;ijk<scale_t;ijk++)
//{
// *offset=1+float (rand());
// offset++;
//}

//paraMatrix=new float[scale_p];
//offset = paraMatrix;
//for(int ijk=0;ijk<scale_t;ijk++)
//{
// *offset=1+float (rand());
// offset++;
//}

//float* productMatrix;
//productMatrix =new float[scale2];
//for(int ijk1=0;ijk1<scale2;ijk1++)
//{
// productMatrix[ijk1]=0;
//}
int t = (hei+2) * (rii+2) * (ang+2);
float* offset;
float* tempMatrix;
tempMatrix=new float[t];
float* dTMatrix;
dTMatrix=new float[t];
offset = tempMatrix;
for(int ijk=0;ijk<t;ijk++)
{
*offset=0;
offset++;
}
//float tempMatrix[*t] = {t};
float* productMatrix;
productMatrix=new float[t];
offset = productMatrix;
for(int ijk=0;ijk<t;ijk++)
{
*offset=0;
offset++;
}
float* paraMatrix;
paraMatrix=new float[(hei*2+1) * (rii*2+1) * (ang*2+1)];
array_view<float, 3> para(hei*2+1, rii*2+1, ang*2+1, paraMatrix);

ParaComputingWithAMP(deltaR, para, hei, rii, ang, height/hei, cV, lamdaDt, hDt);

//以下调试用
for (int i = 0; i < hei*2+1; i++) {
for (int j = 0; j < rii*2+1; j++) {
//std::cout << productMatrix[row*3 + col] << " ";
//for (int k = 0; k < ang*2+1; k++) {
std::cout << para(i, j, 1) << " ";
//}
}
std::cout << "\n";
}
getchar();
array_view<float, 3> temp(hei+2, rii+2, ang+2, tempMatrix);
array_view<float, 3> product(hei+2, rii+2, ang+2, productMatrix);
concurrency::extent<3> e(hei, rii, ang);
//以下调试用
for (int i = 0; i < hei+2; i++) {
for (int j = 0; j < rii+2; j++) {
//std::cout << productMatrix[row*3 + col] << " ";
// for (int k = 0; k < ang+2; k++) {
std::cout << temp(i, j, 1) << " ";
//}
}
std::cout << "\n";
}
getchar();
for (int i = 0; i < hei+2; i++) {
for (int j = 0; j < rii+2; j++) {
//std::cout << productMatrix[row*3 + col] << " ";
// for (int k = 0; k < ang+2; k++) {
std::cout << product(i, j, 1) << " ";
//}
}
std::cout << "\n";
}
getchar();
//生成加热矩阵
array_view<float, 3> dT (hei+2, rii+2, ang+2, dTMatrix);
HeatingWithAMP(dT, deltaT, nLaser, hei);
//以下测试用
for (int i = 0; i < hei+2; i++) {
for (int j = 0; j < rii+2; j++) {
//std::cout << productMatrix[row*3 + col] << " ";
// for (int k = 0; k < ang+2; k++) {
std::cout << dT(i, j, 1) << " ";
//}
}
std::cout << "\n";
}
getchar();

for(len = 0; len < 10; len++){
for(int i = 0; i < 1000; i++){
RisingTransmitingWithAMP(temp, para, dT, product, e);
}
for (int i = 0; i < hei+2; i++) {
for (int j = 0; j < rii+2; j++) {
//std::cout << productMatrix[row*3 + col] << " ";
//for (int k = 0; k < ang+2; k++) {
std::cout << temp(i, j, 1) << " ";
//}
}
std::cout << "\n";
}
getchar();
//getc(stdin);
}
}

...全文

149 4 打赏收藏转发到动态举报

写回复

用AI写文章

4 条回复

切换为时间正序

请发表友善的回复…

发表回复

isaaczhangz 2013-12-29

打赏
举报

部分找到了原因，居然是因为parallel_for_each函数内在AMD平台上不能与小数相乘，如： para（idx）= cV*3.14159265；（无法运行）而必须写作： float pi=3.14159265； para（idx）= cV*pi；这样就可以运行了。有人能告诉我这是为什么么？

ri_aje 2013-12-29

打赏
举报

引用 3 楼 isaaczhangz 的回复:

部分找到了原因，居然是因为parallel_for_each函数内在AMD平台上不能与小数相乘，如： para（idx）= cV*3.14159265；（无法运行）而必须写作： float pi=3.14159265； para（idx）= cV*pi；这样就可以运行了。有人能告诉我这是为什么么？

有可能是 double support 的问题。你把 float pi=3.14159265；换成 double pi=3.14159265；看看是不是和 cV*3.14159265 出同样的问题。也可以看看这个 http://blogs.msdn.com/b/nativeconcurrency/archive/2012/02/07/double-precision-support-in-c-amp.aspx。

isaaczhangz 2013-12-28

打赏
举报

引用 1 楼 ri_aje 的回复:

第一次出错到底是在那行？

连main函数都没进吧？所有设定的中断点都提示“当前不会命中中断点，还没加载任何符号”。出错信息为“Unhandled exception at at 0x00007FFE2DE0AB78 in Heattranmiting.exe: Microsoft C++ exception: Concurrency::runtime_exception at memory location 0x0000001867E1E4D8.”并自动弹开的是系统引用的throw.cpp文件。我在这个文件里设置断点，在该文件的150行到152行反括号之间出错。

ri_aje 2013-12-24