C++处理大量数据,在visual studio中编译运行时闪退,如何解决?
我的程序是读入一个400M的文本文件,统计其中的中文词词频,string全局变量数组开到了string[2500000],这个都没有问题。问题时在编译运行的时候,250M左右的数据还可以处理,再多的话在运行时直接闪退了,也没有任何提示,然而此时任务管理器中显示该程序占用的内存也就590多M,远小于我电脑的内存(我的电脑内存有8G),请问这是什么原因?如何解决?
先把代码贴到下面。
#include <iostream>
#include<fstream>
#include <cstdlib>
#include <map>
#include <vector>
#include <string>
#include <algorithm>
#include<time.h>
#define MAXLINE 2500000
using namespace std;
string line[MAXLINE];
map<string, int> tMap;
vector<pair<string, int>> tVector;
pair<map<string, int>::iterator, bool> ret;
int cmp(const pair<string, int>& x, const pair<string, int>& y)
{
return x.second > y.second;
}
void oneword(string line[], int n)
{
int i,j,k,l; string word,temp;
for(i=0; i<n; i++){
for (j = 0; line[i][j]; j++)
;
k = j;
for (j = 0; j<k; ) {
temp = line[i][j];
temp += line[i][j + 1];
if (('0'-temp[0])<=128) {
word = temp;
ret = tMap.insert(make_pair(word, 1));
if (!ret.second)
++ret.first->second;
}
j = j + 2;
}
}
}
void sortMapByValue(map<string, int>& tMap, vector<pair<string, int> >& tVector)
{
for (map<string, int>::iterator curr = tMap.begin(); curr != tMap.end(); curr++)
tVector.push_back(make_pair(curr->first, curr->second));
sort(tVector.begin(), tVector.end(), cmp);
}
int main()
{
clock_t start = clock();
char str[2000];
char c;
char test[1];
test[0] = '\0';
int n, k, s;
ifstream infile("中文文本.txt");
if (!infile)
{
cout << "打开文件失败" << endl;
}
k = 0;
for (; ; ) {
s = 0;
n = 0;
for (; ; ) {
if (infile.eof()) { s = 1; break; }
else {
infile.get(c);
if (c == '\n') break;
else if (c >= 0 && c <= 127) continue;
else str[n++] = c;
}
}
str[n] = '\0';
if (strcmp(str, test) == 0)
;
else
{
int len;
len = strlen(str);
line[k] = str;
k++;
}
if (k == MAXLINE - 1) { cout << "超出数组范围" << endl; break; }
if (s == 1) break;
}
line[k] = test;
cout << k << endl;//输入文本文件中的数据
oneword(line, k);
sortMapByValue(tMap, tVector);
ofstream out;
out.open("1strings.txt");
for (int i = 0; i<tVector.size(); i++)
out << tVector[i].first << ": " << tVector[i].second << endl;
clock_t ends = clock();
cout << "Running Time : " << (double)(ends - start) / CLOCKS_PER_SEC << endl;
cout << "程序结束" << endl;
system("pause");
return 0;
}