69,371
社区成员
发帖
与我相关
我的任务
分享
#include <cstring>
#include <cstdio>
#include <string>
#include <map>
#include <set>
using namespace std;
struct Substring{
const char* p;
int len;
//the longest and alphabetically smallest is prior
friend bool operator < (const Substring& a, const Substring& b){
if(a.len != b.len) return a.len < b.len;
else return memcmp(a.p, b.p, a.len) > 0;
}
};
struct Substring2{
const char* p;
int len;
int times;
//most frequent and longest and alphabetically smallest is prior
friend bool operator < (const Substring2& a, const Substring2& b){
if(a.times != b.times) return a.times > b.times;
else if(a.len != b.len) return a.len > b.len;
else return memcmp(a.p, b.p, a.len) < 0;
}
};
int findRepeatMostLargestSubstr(string& dest, const string& src)
{
int totalLen = src.size(), len;
const char *s = src.c_str(), *e = s + totalLen, *p;
map<Substring,int> substrMap;
set<Substring2> substrSet;
Substring tmp;
Substring2 tmp2;
for(len = 1; len <= totalLen; ++len){//count the times of every kind of Substring
tmp2.len = tmp.len = len;
for(p = s; p + len <= e; ++p){
tmp2.p = tmp.p = p;
tmp2.times = ++substrMap[tmp];
if(tmp2.times > 1){//not the first time, so it must have been in the set
--tmp2.times;
//we cannot change the compare key, so we erase the item then insert a new one
substrSet.erase(tmp2);
++tmp2.times;
}
substrSet.insert(tmp2);
}
}
tmp2 = *substrSet.begin();
dest = src.substr(tmp2.p-s, tmp2.len);
return tmp2.times;
}
//测试main函数
int main()
{
int times;
string sub;
times = findRepeatMostLargestSubstr(sub, "1231231245");
printf("sub = %s, times = %d\n", sub.c_str(), times);
getchar();
return 0;
}
下面是复杂度分析
(1)空间复杂度:
上述算法中枚举了长度为1~N(字符串总长度)的子字符串,一共有
N*1 + (N-1)*2 + ... + (N-i)*i + ... + 1*N
= sum((N+1-i)*i, i = 1~N)
= sum((N+1)*i, i = 1~N) - sum(i*i, i = 1~N)
= (N+1)*N(N+1)/2 - N(N+1)(2N+1)/6 = N(N+1)(N+2)/6
如果每种子字符串都不同,即无任何子字符串重复出现,则需要的空间最多,此最差空间复杂度为
N(N+1)(N+2)/6 * sizeof(Substring) * 2 = O(N^3),最后的乘2是因为同时有map和set
(2)时间复杂度
观察如下代码
[code=c]
friend bool operator < (const Substring& a, const Substring& b){
if(a.len != b.len) return a.len < b.len;
else return memcmp(a.p, b.p, a.len) < 0;
}
friend bool operator < (const Substring2& a, const Substring2& b){
if(a.times != b.times) return a.times > b.times;
else if(a.len != b.len) return a.len > b.len;
else return memcmp(a.p, b.p, a.len) < 0;
}
/code]
可以看出:
长度不等时,Substring的比较的复杂度为O(1),长度相等时一次比较的最差复杂度为O(a.len);
次数不同或者长度不等时,Substring2的比较的复杂度为O(1),次数不同且长度相等时一次比较的最差复杂度为O(a.len)。
即使认为每次比较的复杂度为最差的O(N),由于一共有O( N(N+1)(N+2)/6 )次查询或插入,此时间复杂度就是
O(N)*log( O(N(N+1)(N+2)/6) ) = O(NlogN),这里取了对数是因为map和set的二分查找性质