64,651
社区成员
发帖
与我相关
我的任务
分享
#include <iostream>
#include <fstream>
#include <cstring>
#include <string>
#include <sstream>
#include <cstdlib>
#include <cstdio>
#include <sys/time.h>
#include <math.h>
#include <openssl/rand.h>
#include <vector>
#include "md5.h"
#define NUM_FILE 2000
#define NUM_WORD 3000
using namespace std;
ifstream infile;
ofstream outfile;
string A[16384][1400]={};
string flag[16384]={};
string word[4000]={};
int wnum[4000]={0};
unsigned char c1[1000],c2[1000],c3[1000];
unsigned char md4[1600],md5[1600],md3[1600];
unsigned char K1[8]={0x2d,0x63,0x8c,0x17,0x17,0x81,0x67,0x1f},//f
K2[8]={0xb0,0xc9,0x5a,0xbf,0x33,0x40,0xe3,0x4d},//g
K3[8]={0x41,0xb3,0x30,0xaa,0x5f,0x84,0xd7,0x3f},//rho
K4[8]={0x0d,0x53,0x1a,0xf4,0xb0,0x61,0x27,0x9c},//phi
K5[8]={0xd7,0x86,0x3c,0x72,0x00,0xcc,0x1c,0xed};//H
//K6[8]={0xce,0x61,0xd1,0x3c,0xa1,0xd1,0xc3,0x2b};//random
string randstr()//随机生成一个伪MD5值,用作r值
{
const int LEN = 16;
char g_arrCharElem[LEN] = {'0', '1', '2','3','4','5','6','7','8','9','a','b', 'c', 'd', 'e', 'f'};
int iLen=8;
string str;
int iRand = 0;
for (int i = 0; i < iLen; ++i)
{
iRand = rand() % LEN;
str = str+g_arrCharElem[iRand];
}
return str;
}
void createtable(ifstream &infile, ofstream &outfile,int identifier)
{
int i=0,j,t=1,col=0,l;
char p[40],q[40];
char ch;
int m1,m2;
ch=infile.get();//读取一个字符
p[0]='\0';
while((ch!=EOF)&&(t==1)) //读取第一个单词
{
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z'))//if语句把一个单词读完
{
if(ch<='Z') ch=ch+32; //如果该字母大写则转成小写
p[i]=ch;
i++;
} //end if
else//else语句把单词插入表中
{
if(p[0]=='\0')
{
ch=infile.get();
continue;
}//end if
p[i]='\0';
m1=0;
word[m1]=p;
wnum[m1]++;
//计算第一个单词在表A中的行
string str1=word[m1]+std::to_string(identifier);//把单词和文件标识连接起来:w||id
strcpy((char*)c1,str1.data());
hmac_md5_hash(c1,str1.size(),K4,8,md4);//生成单词存储所在的行数,phi值
char tmp1[3]={},buf1[5]={};
for (i = 0; i < 2; i++)//读取md5中的4位十六进制数,设为buf1 进而转成十进制的col,并设M【col】存储该单词的信息
{
sprintf(tmp1,"%2.2x",md4[i]);
strcat(buf1,tmp1);
}
sscanf(buf1,"%x",&col);//把十六进制数转换成十进制
col=col%16384;
flag[col]=p;
//计算在该行内填入的信息:r*||H(word1,1)
string str3=randstr();//生成随机的r值
string str2=word[m1]+std::to_string(wnum[m1]);//把单词和出现次数连接起来
strcpy((char*)c2,str2.data());
hmac_md5_hash(c2,str2.size(),K5,8,md5);
char tmp2[3]={},buf2[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmp2,"%2.2x",md5[i]);
strcat(buf2,tmp2);
}
string ts= str3+buf2;//ts标识要待插入的信息
//l=0;
/*while(A[col][l]!="\0"&&l<16383)//查找col行第一个非空的位置
{
l++;
}*/
A[col][0]=ts;
i=0;
t++;
}//end else
ch=infile.get();
}// end while
q[0]='\0';
while(ch!=EOF)//读到文本结束
{
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z'))
{
if(ch<='Z') ch=ch+32;
q[i]=ch;
i++;
ch=infile.get();
} //end if
else//else语句把单词插入表中
{
if(q[0]=='\0')//若读到非字母字符则跳过
{
ch=infile.get();
continue;
}//end if
q[i]='\0';
ch=infile.get();
m2=0;
int wflag=0;
while(word[m2]!="")//遍历已有word数组,查看当前单词是否已存储
{
if(word[m2]==q)//若已存储
{
wnum[m2]++;
wflag=1;
break;
}
m2++;
}
if(wflag==0)//若还未存储
{
word[m2]=q;
wnum[m2]++;
}
//计算当前单词在表A中的行
string str1=word[m2]+std::to_string(identifier);//把单词和文件标识连接起来:w||id
strcpy((char*)c1,str1.data());
hmac_md5_hash(c1,str1.size(),K4,8,md4);//生成单词存储所在的行数,phi值
char tmp1[3]={},buf1[5]={};
for (i = 0; i < 2; i++)//读取md5中的4位十六进制数
{
sprintf(tmp1,"%2.2x",md4[i]);
strcat(buf1,tmp1);
}
sscanf(buf1,"%x",&col);//把十六进制数转换成十进制
col=col%16384;
while(flag[col]!=q)//若第col行存储的不是单词q
{
if(flag[col] !="")//且不为空:说明已存储别的单词
col++;
else
flag[col]=q;//且为空:说明该行还未存储单词,直接赋值
}
//计算在该行内填入的信息:h(H(p,次数))||H(q,次数)
string str2=word[m1]+std::to_string(wnum[m1]);//把单词p和出现次数连接起来
strcpy((char*)c2,str2.data());
hmac_md5_hash(c2,str2.size(),K5,8,md5);//H(p,次数)
char tmpt[3]={},buft[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmpt,"%2.2x",md5[i]);
strcat(buft,tmpt);//buft=H(p,次数)
}
str2=buft;
string strt=word[m1]+word[m2]+std::to_string(identifier);//把单词p,q和文件标识连接起来
strcpy((char*)c2,strt.data());
hmac_md5_hash(c2,strt.size(),K3,8,md3);//md3=sk=rho(p||q||id)
strcpy((char*)c2,str2.data());
hmac_md5_hash(c2,str2.size(),md3,8,md5);//h_sk(H(p,次数))
char tmp2[3]={},buf2[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmp2,"%2.2x",md5[i]);
strcat(buf2,tmp2);//buf2=h(H(p,次数))
}
string str3=word[m2]+std::to_string(wnum[m2]);//把单词q和出现次数连接起来
strcpy((char*)c3,str3.data());
hmac_md5_hash(c3,str3.size(),K5,8,md5);
char tmp3[3]={},buf3[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmp3,"%2.2x",md5[i]);
strcat(buf3,tmp3);//buf3=H(q,次数)
}
strcat(buf2,buf3);//连接h(H(p,次数))和H(q,次数)
string ts=buf2;
l=0;
while(A[col][l]!="\0"&&l<1400)//查找col行第一个非空的位置
{
l++;
}
A[col][0]=ts;
i=0;
t++;
strcpy(p,q);
m1=m2;
q[0]='\0';
}//end else
}//end while
for(i=0;i<16384;i++)//输出到文件中
{
for(j=0;j<1400;j++)
{
if(A[i][j]!="\0")
outfile<<A[i][j]<<" ";
else//否则插入随机伪md5值
{
A[i][j]=randstr()+randstr();//一个randstr的输出是8个十六进制数
outfile<<A[i][j]<<" ";
}
}
outfile<<endl;
}//end for
//清空定义的全局数组
memset(A,0,sizeof(A));
memset(flag,0,sizeof(flag));
memset(word,0,sizeof(word));
memset(wnum,0,sizeof(wnum));
}//end createtable
int main()
{
srand((unsigned)time(0));
char inpath[50];
char outpath[50];
struct timeval start,end;
double t=0;
gettimeofday(&start,NULL);
int i;
for(i=1; i<=NUM_FILE; i++)
{
sprintf(inpath,"DataSet/12000/%d",i);
//打开这个路径下的前i个文件
infile.open(inpath);
if(!infile)
{
printf("The infile '%d' doesn't exist!\n",i);
exit(0);
}
sprintf(outpath,"output/12000/%d",i);
//打开这个路径下的前i个文件
outfile.open(outpath);
if(!outfile)
{
printf("The outfile '%d' doesn't exist!\n",i);
exit(0);
}
//得到文件的信息
printf("id = %d\n", i);
gettimeofday(&start,NULL);
createtable(infile, outfile,i);
infile.close();
outfile.close();
gettimeofday(&end,NULL);
t = t + 1000000*(end.tv_sec-start.tv_sec)+ end.tv_usec-start.tv_usec;
printf("the time of id %d is %fus \n", i,t);
}
cout<<"the time of generating table:" <<t<<"us"<<endl;
return 0;
}
FILE *fA;fA=fopen("A","rb+");_fseeki64(fA,10000000000i64*sizeof(int),SEEK_SET);fputc(fA,0);//int A[10000000000];
int B;
_fseeki64(fA,9999999999i64*sizeof(int),SEEK_SET);fread(&B,1,sizeof(int),fA);//B=A[9999999999];
_fseeki64(fA,9999999999i64*sizeof(int),SEEK_SET);fwrite(&B,1,sizeof(int),fA);//A[9999999999]=B;
fclose(fA);
#include <memory>
shared_ptr<string> internalA(new string[16384*1400]);
string (*A)[1400]= reinterpret_cast< string( *)[1400]> (internalA.get());
shared_ptr<string> internalF(new string[16384]);
string * flag = internalF.get();
shared_ptr<string> internalW(new string[4000]);
string * word = internalW.get();
string A[16384][1400]={};
string flag[16384]={};
string word[4000]={};
int wnum[4000]={0};
unsigned char c1[1000],c2[1000],c3[1000];
unsigned char md4[1600],md5[1600],md3[1600];
#include <array>
array<array<string,1400>,16384 > A;
array<string,16384> flag;
array<string,4000> word;
int wnum[4000]={0};
#include <iostream>
#include <fstream>
#include <cstring>
#include <string>
#include <sstream>
#include <cstdlib>
#include <cstdio>
#include <sys/time.h>
#include <math.h>
#include <openssl/rand.h>
#include <array>
#include <vector>
#include "md5.h"
#define NUM_FILE 500
#define NUM_WORD 3000
using namespace std;
ifstream infile;
ofstream outfile;
string A[4096][1400]={};
string flag[4096]={};
string word[4096]={};
int wnum[4096]={0};
const int LEN = 16;
char g_arrCharElem[LEN] = {'0', '1', '2','3','4','5','6','7','8','9','a','b', 'c', 'd', 'e', 'f'};
unsigned char c1[100],c2[100],c3[100];
unsigned char md4[160],md5[160],md3[160];
unsigned char K1[8]={0x2d,0x63,0x8c,0x17,0x17,0x81,0x67,0x1f},//f
K2[8]={0xb0,0xc9,0x5a,0xbf,0x33,0x40,0xe3,0x4d},//g
K3[8]={0x41,0xb3,0x30,0xaa,0x5f,0x84,0xd7,0x3f},//rho
K4[8]={0x0d,0x53,0x1a,0xf4,0xb0,0x61,0x27,0x9c},//phi
K5[8]={0xd7,0x86,0x3c,0x72,0x00,0xcc,0x1c,0xed};//H
//K6[8]={0xce,0x61,0xd1,0x3c,0xa1,0xd1,0xc3,0x2b};//random
string randstr()//随机生成一个伪MD5值,用作r值
{
int iLen=16;
string str;
int iRand = 0;
for (int i = 0; i < iLen; ++i)
{
iRand = rand() % LEN;
str = str+g_arrCharElem[iRand];
}
return str;
}
void createtable(ifstream &infile, ofstream &outfile,int identifier,string **A)
{
srand((unsigned)time(0));
string str1,str2,str3,strt;
int i=0,j,t=1,col=0,l;
char p[40],q[40];
char ch;
int m1,m2;
ch=infile.get();//读取一个字符
p[0]='\0';
cout<<"1"<<endl;
while((ch!=EOF)&&(t==1)) //读取第一个单词
{
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z'))//if语句把一个单词读完
{
if(ch<='Z') ch=ch+32; //如果该字母大写则转成小写
p[i]=ch;
i++;
} //end if
else//else语句把单词插入表中
{
if(p[0]=='\0')
{
ch=infile.get();
continue;
}//end if
p[i]='\0';
m1=0;
word[m1]=p;
wnum[m1]++;
//计算第一个单词在表A中的行
str1=word[m1]+std::to_string(identifier);//把单词和文件标识连接起来:w||id
strcpy((char*)c1,str1.data());
hmac_md5_hash(c1,str1.size(),K4,8,md4);//生成单词存储所在的行数,phi值
char tmp1[3]={},buf1[5]={};
for (i = 0; i < 2; i++)//读取md5中的4位十六进制数,设为buf1 进而转成十进制的col,并设M【col】存储该单词的信息
{
sprintf(tmp1,"%2.2x",md4[i]);
strcat(buf1,tmp1);
}
sscanf(buf1,"%x",&col);//把十六进制数转换成十进制
col=col%4096;
flag[col]=p;
//计算在该行内填入的信息:r*||H(word1,1)
str3=randstr();//生成随机的r值
str3=str3.substr(0,8);
str2=word[m1]+std::to_string(wnum[m1]);//把单词和出现次数连接起来
strcpy((char*)c2,str2.data());
hmac_md5_hash(c2,str2.size(),K5,8,md5);
char tmp2[3]={},buf2[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmp2,"%2.2x",md5[i]);
strcat(buf2,tmp2);
}
string ts= str3+buf2;//ts标识要待插入的信息
//l=0;
/*while(A[col][l]!="\0"&&l<16383)//查找col行第一个非空的位置
{
l++;
}*/
A[col][0]=ts;
i=0;
t++;
}//end else
ch=infile.get();
}// end while
q[0]='\0';
cout<<"2"<<endl;
while(ch!=EOF)//读到文本结束
{
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z'))
{
if(ch<='Z') ch=ch+32;
q[i]=ch;
i++;
ch=infile.get();
} //end if
else//else语句把单词插入表中
{
if(q[0]=='\0')//若读到非字母字符则跳过
{
ch=infile.get();
continue;
}//end if
q[i]='\0';
ch=infile.get();
m2=0;
int wflag=0;
while(word[m2]!=""&&m2<4096)//遍历已有word数组,查看当前单词是否已存储
{
if(word[m2]==q)//若已存储
{
wnum[m2]++;
wflag=1;
break;
}
m2++;
}
if(wflag==0)//若还未存储
{
word[m2]=q;
wnum[m2]++;
}
//计算当前单词在表A中的行
str1=word[m2]+std::to_string(identifier);//把单词和文件标识连接起来:w||id
strcpy((char*)c1,str1.data());
hmac_md5_hash(c1,str1.size(),K4,8,md4);//生成单词存储所在的行数,phi值
char tmp1[3]={},buf1[5]={};
for (i = 0; i < 2; i++)//读取md5中的4位十六进制数
{
sprintf(tmp1,"%2.2x",md4[i]);
strcat(buf1,tmp1);
}
sscanf(buf1,"%x",&col);//把十六进制数转换成十进制
col=col%4096;
wflag=0;
while(flag[col]!="")//
{
if(flag[col]==q)//
{
wflag=1;
break;
}
col++;//
}
if(wflag==0)
{
flag[col]=q;
}
//计算在该行内填入的信息:h(H(p,次数))||H(q,次数)
//先计算H(p,次数)
str2=word[m1]+std::to_string(wnum[m1]);//把单词p和出现次数连接起来
strcpy((char*)c2,str2.data());
hmac_md5_hash(c2,str2.size(),K5,8,md5);//H(p,次数)
char tmpt[3]={},buft[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmpt,"%2.2x",md5[i]);
strcat(buft,tmpt);//buft=H(p,次数)
}
//再计算h(H(p,次数))
str2=buft;
strt=word[m1]+word[m2]+std::to_string(identifier);//把单词p,q和文件标识连接起来
strcpy((char*)c2,strt.data());
hmac_md5_hash(c2,strt.size(),K3,8,md3);//md3=sk=rho(p||q||id)
strcpy((char*)c2,str2.data());
hmac_md5_hash(c2,str2.size(),md3,8,md5);//h_sk(H(p,次数))
char tmp2[3]={},buf2[17]={};
for (i = 0; i < 4; i++)
{
sprintf(tmp2,"%2.2x",md5[i]);
strcat(buf2,tmp2);//buf2=h(H(p,次数))
}
//计算H(q,次数)
str3=word[m2]+std::to_string(wnum[m2]);//把单词q和出现次数连接起来
strcpy((char*)c3,str3.data());
hmac_md5_hash(c3,str3.size(),K5,8,md5);
char tmp3[3]={},buf3[9]={};
for (i = 0; i < 4; i++)
{
sprintf(tmp3,"%2.2x",md5[i]);
strcat(buf3,tmp3);//buf3=H(q,次数)
}
//连接h(H(p,次数))和H(q,次数)
strcat(buf2,buf3);
string ts=buf2;
l=0;
while(A[col][l]!="\0"&&l<1400)//查找col行第一个非空的位置
{
l++;
}
A[col][l]=ts;
i=0;
t++;
strcpy(p,q);
m1=m2;
q[0]='\0';
}//end else
}//end while
cout<<"now is output"<<endl;
for(i=0;i<4096;i++)//输出到文件中
{
for(j=0;j<1400;j++)
{
if(A[i][j]!="\0")
outfile<<"*"<<A[i][j]<<" ";
else//否则插入随机伪md5值
{
A[i][j]=randstr();//一个randstr的输出是8个十六进制数
outfile<<A[i][j]<<" ";
}
}
outfile<<endl;
outfile<<endl;
}//end for
//清空定义的全局数组
/*array<array<string,4096>,1400> B;
B.swap(A);
array<string,4096> flagt;
flagt.swap(flag);
array<string,4096> wordt;
wordt.swap(word);*/
//memset(A,0,sizeof(A));
memset(flag,0,sizeof(flag));
memset(word,0,sizeof(word));
memset(wnum,0,sizeof(wnum));
}//end createtable
int main()
{
char inpath[50];
char outpath[50];
struct timeval start,end;
double t=0;
gettimeofday(&start,NULL);
int i;
for(i=1; i<=NUM_FILE; i++)
{
sprintf(inpath,"DataSet/12000/%d",i);
//打开这个路径下的前i个文件
infile.open(inpath);
if(!infile)
{
printf("The infile '%d' doesn't exist!\n",i);
exit(0);
}
sprintf(outpath,"output/12000/%d",i);
//打开这个路径下的前i个文件
outfile.open(outpath);
if(!outfile)
{
printf("The outfile '%d' doesn't exist!\n",i);
exit(0);
}
//得到文件的信息
printf("id = %d\n", i);
gettimeofday(&start,NULL);
createtable(infile, outfile,i,A);
infile.close();
outfile.close();
gettimeofday(&end,NULL);
t = t + 1000000*(end.tv_sec-start.tv_sec)+ end.tv_usec-start.tv_usec;
printf("the time of the first %d files is %fus \n", i,t);
}
cout<<"the time of generating table:" <<t<<"us"<<endl;
return 0;
}
这是现在的代码,又修改了A的大小和一些细节的错误。