64,632
社区成员
发帖
与我相关
我的任务
分享
ifstream iVecSim("input.txt");
iVecSim.seekg(0, iVecSim.end);
long long file_size = iVecSim.tellg();//文件大小
iVecSim.seekg(0, iVecSim.beg);
char *buffer = new char[file_size];
iVecSim.read(buffer, file_size);
string input(buffer);
delete[]buffer;
istringstream ss_sim(input);//string流
string fVecSim;
while (ss_sim.good()) {//从string流中读入vector
ss_sim >> fVecSim;
vec_similarity.push_back(atof(fVecSim.c_str()));
}
$ yes 1.0 | head -50000000 >5KW
$ head -2 5KW
1.0
1.0
$ wc 5KW
50000000 50000000 200000000 5KW
$ cat a.cpp
# include <iostream>
# include <fstream>
# include <vector>
# include <algorithm>
# include <iterator>
using namespace std;
int main()
{
ifstream input("5KW");
vector<double> v;
cout << v.size() << endl;;
copy(istream_iterator<double>(input),
istream_iterator<double>(),
back_inserter(v));
cout << v.size() << endl;;
cout << v[0] << endl;
cout << v[v.size() - 1] << endl;
return 0;
}
$ time ./a
0
50000000
1
1
real 0m12.200s
user 0m11.316s
sys 0m0.152s
$ cat b.cpp
# include <iostream>
# include <fstream>
# include <vector>
# include <algorithm>
# include <iterator>
using namespace std;
int main()
{
ifstream input("5KW");
vector<double> v(50000000);
cout << v.size() << endl;;
for (size_t i = 0; i < 50000000; i++)
input >> v[i];
cout << v.size() << endl;;
cout << v[0] << endl;
cout << v[v.size() - 1] << endl;
return 0;
}
$ time ./b
50000000
50000000
1
1
real 0m10.103s
user 0m9.272s
sys 0m0.064s
一开始就把空间分配好,确实会快,可是只是快了那么一点点。时间都消耗在io上了。
$ cat c.cpp
# include <iostream>
# include <fstream>
# include <vector>
# include <algorithm>
# include <iterator>
using namespace std;
int main()
{
ifstream input("5KW");
double * v = new double[50000000];
for (size_t i = 0; i < 50000000; i++)
input >> v[i];
cout << v[0] << endl;
cout << v[50000000 - 1] << endl;
return 0;
}
$ time ./c
1
1
real 0m9.516s
user 0m9.064s
sys 0m0.076s
$ cat d.cpp
# include <iostream>
# include <fstream>
# include <vector>
# include <algorithm>
# include <iterator>
# include <cstdio>
using namespace std;
int main()
{
FILE * input = fopen("5KW", "r");
double * v = new double[50000000];
for (size_t i = 0; i < 50000000; i++)
fscanf(input, "%lf", &v[i]);
cout << v[0] << endl;
cout << v[50000000 - 1] << endl;
return 0;
}
$ time ./d
1
1
real 0m6.250s
user 0m5.844s
sys 0m0.056s
看上去用array比用vector好,c的io比c++的io好。
LZ应该是在读5亿个数据才对吧。
FILE *fA;fA=fopen("A","rb+");_fseeki64(fA,10000000000i64*sizeof(int),SEEK_SET);fputc(fA,0);//int A[10000000000];
int B;
_fseeki64(fA,9999999999i64*sizeof(int),SEEK_SET);fread(&B,1,sizeof(int),fA);//B=A[9999999999];
_fseeki64(fA,9999999999i64*sizeof(int),SEEK_SET);fwrite(&B,1,sizeof(int),fA);//A[9999999999]=B;
fclose(fA);
ifstream iVecSim("input.txt");
iVecSim.seekg(0, iVecSim.end);
long long file_size = iVecSim.tellg();//文件大小
iVecSim.seekg(0, iVecSim.beg);
char *buffer = new char[file_size];
iVecSim.read(buffer, file_size); // 这里读了一遍
string input(buffer); // 这里虽说系统有copy on write,但语义上还是有一次copy的动作。
delete[]buffer;
istringstream ss_sim(input);//string流
string fVecSim;
while (ss_sim.good()) {//从string流中读入vector
ss_sim >> fVecSim;
vec_similarity.push_back(atof(fVecSim.c_str()));
}
本来你new一个文件流对象,然后直接读就是了,被你优化后,又是文件io,又是内存copy。
已经提示了可以试试mmap。
另外到了这个量级,你的算法基本上得设计下了。