580
社区成员
发帖
与我相关
我的任务
分享
//#include "hash.h"
#include <stdio.h>
#include <string.h>
#define MS 32
#define BW 8
#define BL 32
#define BI 128
__device__ __constant__ char _hex[] = "0123456789ABCDEF";
__global__ void Hash(char* ,char* );
__device__ void OutputFunction(uint32_t*,uint32_t*);
__device__ void InputFunction(uint32_t*,uint32_t*,uint32_t*);
__device__ void RoundFunction(uint32_t*,uint32_t*);
__device__ char hex(int*);
__device__ void stringtohex_BE(char*, char*);
__device__
uint32_t ROR(uint32_t x, int y){
int y_mod = ((y & 0x1F) + 32) & 0x1F;
return ROR32(x, y_mod);
}
__device__
inline unsigned int index2(unsigned int i, unsigned int j){
return (unsigned int) (i*BW+j);
}
__device__
char hex(int nibble){
return _hex[nibble];
}
__device__
void stringtohex_BE(char* in, char* out){
int j=0;
for(int i=0;i<64;i+=2)
{
out[i] = hex((in[j] & 0xF0) >> 4);
out[i+1]= hex((in[j] & 0x0F) >> 0);
j++;
}
out[64]='\0';
return;
}
__device__
bool check_hash(char* hash){
//check if first character is a zero
for(int i=0;i<LEADING_ZEROES;i++)
if (hash[i]!='0')
return false;
return true;
}
//note: output must be 32+1 chars (+1 for termination of string)
__global__
void Hash(char* input, char* nonce)
{
char nonce_[62]={'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
'0','1','2','3','4','5','6','7','8','9'};
char output[65];
char output_str[65];
int tx = threadIdx.x;
//uint32_t a[MS];
//uint32_t b[BL*BW];
uint32_t in[BW];
uint32_t out[2];
input[0]=nonce_[tx];
//uint32_t inputSize=(uint32_t)strlen(input); INPUT_SIZE
uint32_t inputSize=(uint32_t)(INPUT_SIZE+NONCE_SIZE);
//init with zeros
/*for(unsigned int i=0; i<MS; i++)
a[i] = 0;
for(unsigned int i=0; i<BL*BW; i++)
b[i] = 0;*/
uint32_t a[MS]={0};
uint32_t b[BL*BW]={0};
unsigned int p = 0;
while(p+sizeof(uint32_t)*BW <=inputSize) {
for(unsigned int q=0; q<BW; q++) {
in[q] = 0;
for(unsigned int w=0; w<sizeof(uint32_t); w++)
in[q] |= (uint32_t)((unsigned char)(input[p+q*sizeof(uint32_t)+w])) << (8*w);
}
p += sizeof(uint32_t)*BW;
InputFunction(in,a,b);
RoundFunction(a,b);
}
//uint32_t test[BW+1]={};
//padding
//char last_block[(BW+1)*sizeof(uint32_t)/sizeof(char)];
//for(int i=0;i<((BW+1)*sizeof(uint32_t)/sizeof(char));i++)
// last_block[i]=0;
char last_block[(BW+1)*sizeof(uint32_t)/sizeof(char)]={0};
for(uint32_t i=0;i<inputSize-p;i++)
last_block[i]=input[p+i];
last_block[inputSize-p]=(char) 0x01;
for(unsigned int q=0; q<BW; q++) {
in[q] = 0;
for(unsigned int w=0; w<sizeof(uint32_t); w++)
in[q] |= (uint32_t)((unsigned char)(last_block[q*sizeof(uint32_t)+w])) << (8*w);
}
//free(last_block);
InputFunction(in,a,b);
RoundFunction(a,b);
//do some iterations without new input
for(uint32_t i=0; i<BI; i++)
RoundFunction(a,b);
//collect 32 output characters
for(uint32_t i=0;i<32/(2*sizeof(uint32_t));i++){
RoundFunction(a,b);
OutputFunction(out,a);
for(unsigned int q=0; q<2; q++)
for(unsigned int w=0; w<sizeof(uint32_t); w++)
output[i*sizeof(uint32_t)*2+q*sizeof(uint32_t)+w] = (char)((out[q] >> (8*w)) & 0xFF);
}
output[32]='\0';
stringtohex_BE(output,output_str);
if (check_hash(output_str))nonce[0]=input[0];
}
__device__
void RoundFunction(uint32_t* a,uint32_t* b)
{
uint32_t q[BW];
for(unsigned int j=0; j<BW; j++)
q[j] = b[index2(BL-1,j)];
for(unsigned int i=BL-1; i>0; i--)
for(unsigned int j=0; j<BW; j++)
b[index2(i,j)] = b[index2(i-1,j)];
for(unsigned int j=0; j<BW; j++)
b[index2(0,j)] = q[j];
for(unsigned int i=0; i<12; i++)
b[index2(i+1,i%BW)] ^= a[i+1];
uint32_t A[MS];
for(unsigned int i=0; i<MS; i++)
A[i] = a[i]^(a[(i+1)%MS]|(~a[(i+2)%MS]));
for(unsigned int i=0; i<MS; i++)
a[i] = ROR(A[(7*i)%MS], i*(i+1)/2);
for(unsigned int i=0; i<MS; i++)
A[i] = a[i]^a[(i+1)%MS]^a[(i+4)%MS];
A[0] ^= 1;
for(unsigned int i=0; i<MS; i++)
a[i] = A[i];
for(unsigned int j=0; j<BW; j++)
a[j+13] ^= q[j];
}
__device__
void InputFunction(uint32_t* in,uint32_t* a,uint32_t* b)
{
for(unsigned int j=0; j<BW; j++)
a[j+16] ^= in[j];
for(unsigned int j=0; j<BW; j++)
b[index2(0,j)] ^= in[j];
}
__device__
void OutputFunction(uint32_t* out,uint32_t* a)
{
for(unsigned int j=0; j<2; j++)
out[j] = a[j+1];
}
kernelF<<<(1,1),(16,16)>>>(A);
__device__ kernelF(A){
__shared__ smem[16][16];
i = threadIdx.y;
j = threadIdx.x;
smem[i][j] = A[i][j]; // load to smem
A[i][j] = ( smem[i-1][j-1] + smem[i-1][j] +
… + smem[i+1][i+1] ) / 9;
}
这种方法?请问有其他的解决方法吗?在无法提高cuda版本的情况下,因为是在服务器上修改practice,所以我不能提高cuda版本,只能单纯的从原有代码进行修改。
尝试过上边的这种,提示的是a __device__ function call cannot be configured
然后昨晚还尝试把main里边的nonce用thread给device函数 例如最后边的output使用,写成int j=threadIdx.x;
if(j<2)
out[j] = a[j+1];
或者在main里边cudaMemcpy(input_d, input, (INPUT_SIZE+NONCE_SIZE+1)*sizeof(char),
cudaMemcpyHostToDevice);
cudaMemcpy(nonce_d, nonce, sizeof(char),
cudaMemcpyHostToDevice);
//calculate hash
Hash<<<1,64>>>(input_d, nonce_d);
cudaMemcpy(input, input_d, (INPUT_SIZE+NONCE_SIZE+1)*sizeof(char),
cudaMemcpyDeviceToHost);
cudaMemcpy(nonce, nonce_d, sizeof(char),
cudaMemcpyDeviceToHost);
把Hash<<<1,62>>>改写成1,64然后 if(tx<62)
input[0]=nonce_[tx];
后边device的output改成大于62也不可以。。不知道可以不可以以这种思路修改。这样出来的结果是哈希值不对。