按照不区分大小写的方式来匹配子串,有无高效的算法?

Netguy 2001-06-22 09:54:00
欲在串s1中查找串s2出现的地方,这两个串在查找时都不区分大小写。

如果先把s2和s1全都转换成大写,然后再用strstr( )匹配,感觉这样效率不高。而且我的程序中的s1实际上是不允许修改的,因此如果要把s1转换成大写,先得把s1复制一份,这也会降低效率,还得分配额外的内存。

有无高效的算法?thx
...全文
58 2 打赏 收藏 转发到动态 举报
写回复
用AI写文章
2 条回复
切换为时间正序
请发表友善的回复…
发表回复
Netguy 2001-06-22
  • 打赏
  • 举报
回复
谢啦,帮我大忙了。先给分,回头再试。
孩皮妞野 2001-06-22
  • 打赏
  • 举报
回复
Author: James Buchanan

头文件

/*

Super fast linear text search algorithms:
searchi = search ignore case
search = search case sensitive
searchiw = search ignore case words only (e.g. words delimited by whitespace only,
not words within words)
searchw() = search case sensitive words only

All functions return the number of matches for keyword in buffer, or -1 on error.

by James Buchanan
No license ristrictions on this code.

Email: jamesb@northnet.com.au

*/

#ifndef __TEXTSEARCH_H
#define __TEXTSEARCH_H

#include <stdio.h>
#include <stdlib.h>

int searchi(const char *buffer, const char *keyword);
int search(const char *buffer, const char *keyword);
int searchiw(const char *buffer, const char *keyword);
int searchw(const char *buffer, const char *keyword);

#endif

CPP文件

/*

Super fast linear text search algorithms:
searchi = search ignore case
search = search case sensitive
searchiw = search ignore case words only (e.g. words delimited by whitespace only,
not words within words)
searchw() = search case sensitive words only

All functions return the number of matches for keyword in buffer, or -1 on error.

by James Buchanan
No license ristrictions on this code.

Email: jamesb@northnet.com.au

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "textsearch.h"

int searchi(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (!b_len || !k_len)
return -1;
ch_matches = found = 0;
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (tolower(buffer[i+j]) == tolower(keyword[j])) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
return found;
}

int search(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (!b_len || !k_len)
return -1;
ch_matches = found = 0;
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (buffer[i+j] == keyword[j]) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
return found;
}

int searchiw(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
char *temp_keyword;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (b_len < 2 || k_len < 2) /* Useless, for words only */
return -1;
if (keyword[0] != ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+3, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] == ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] != ' ' && keyword[k_len-1] == ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
}
else {
/* If we get to here and no if statement has executed, keyword already has whitespaces
surrounding it */
temp_keyword = (char *)calloc(k_len+1, sizeof(char));
if (!temp_keyword)
return -1;
strcpy(temp_keyword, keyword);
}
ch_matches = found = 0;
k_len = strlen(temp_keyword); /* Calculate new string length */
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (buffer[i+j] == temp_keyword[j]) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
if (temp_keyword != NULL)
free(temp_keyword);
return found;
}

int searchw(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
char *temp_keyword;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (b_len < 2 || k_len < 2) /* Useless, for words only */
return -1;
if (keyword[0] != ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+3, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] == ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] != ' ' && keyword[k_len-1] == ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
}
else {
/* If we get to here and no if statement has executed, keyword already has whitespaces
surrounding it */
temp_keyword = (char *)calloc(k_len+1, sizeof(char));
if (!temp_keyword)
return -1;
strcpy(temp_keyword, keyword);
}
ch_matches = found = 0;
k_len = strlen(temp_keyword); /* Calculate new string length */
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (buffer[i+j] == temp_keyword[j]) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
if (temp_keyword != NULL)
free(temp_keyword);
return found;
}

如果你觉得他不够快, 告诉我一声。

33,028

社区成员

发帖
与我相关
我的任务
社区描述
数据结构与算法相关内容讨论专区
社区管理员
  • 数据结构与算法社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧