cublas矩阵维度错误造成的内存错误
小可这里一个小程序,cubals矩阵乘以向量。不知道是何原因,内存报错,还请高手指点啊,多谢!!!!
//CUDA编程学习 2017 10 28
#include<cuda.h>
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include <iostream>
using namespace std;
using namespace std;
int main()
{
const float alpha = 2.5;
//矩阵维度(未转置)
const int num_rows = 3;
const int num_columns = 4;
const size_t elem_size = sizeof(float);
cublasHandle_t handle;
cublasCreate(&handle);
//分配主机内存
float * host_matrix_ptr;
cudaMallocHost((void**)&host_matrix_ptr, num_rows*num_columns*elem_size);
float * host_vector_ptr;
cudaMallocHost((void**)&host_vector_ptr, num_columns*elem_size);
float * host_CalcResult_ptr;
cudaMallocHost((void**)& host_CalcResult_ptr, num_rows*elem_size);
//分配设备内存
float * dev_matrix_ptr;
cudaMalloc((void **)&dev_matrix_ptr, num_rows*num_columns*elem_size);
float * dev_vector_ptr;
cudaMalloc((void **)&dev_vector_ptr, num_columns*elem_size);
float * dev_CalcResult_ptr;
cudaMalloc((void **)&dev_CalcResult_ptr, num_rows*elem_size);
//填充主机矩阵向量内存
/*float a[3][4] = { { 2,5,3,6 },{ 1,7,2,4 },{ 6,9,3,8 } };
float v[] = { 3,5,8,9 };
for (int w = 0; w < num_columns; w++) { host_vector_ptr[w] = v[w]; cout << host_vector_ptr[w] << endl; }
for (int m = 0; m < num_rows; m++)
{
for (int n = 0; n < num_columns; n++)
{
*(host_matrix_ptr + (m*num_columns) + n) = a[m][n];
}
}*/
float v[] = { 3,5,8,9 };
for (int w = 0; w < num_columns; w++) { host_vector_ptr[w] = v[w]; cout << host_vector_ptr[w] << endl; }
float a[] = { 2,5,3,6,1,7,2,4,6,9,3,8};
for(int r= 0;r<num_rows*num_columns;r++){host_matrix_ptr[r]=a[r];}
//从主机拷贝内存到设备
cublasSetMatrix(num_rows, num_columns, elem_size, host_matrix_ptr, num_rows, dev_matrix_ptr, num_rows);
cublasSetVector(num_columns, elem_size, host_vector_ptr, 1, dev_vector_ptr, 1);
for(int c=0;c<num_rows*num_columns;c++){cout<<&dev_matrix_ptr[c]<<endl;}
//执行计算
cublasSgemv(handle, CUBLAS_OP_T, num_columns, num_columns, &alpha, dev_matrix_ptr, num_columns, dev_vector_ptr, 1, 0, dev_CalcResult_ptr, 1);
//将结果传回主机
cublasGetVector(num_rows, elem_size, dev_CalcResult_ptr, 1, host_CalcResult_ptr, 1);
for (int s = 0; s < num_rows; s++) { cout << host_CalcResult_ptr[s] << endl; }
cudaFree(dev_matrix_ptr);
cudaFree(dev_vector_ptr);
cudaFree(dev_CalcResult_ptr);
free(host_matrix_ptr);
free(host_vector_ptr);
free(host_CalcResult_ptr);
cublasDestroy(handle);
return 0;
}