按照不区分大小写的方式来匹配子串，有无高效的算法？

Netguy 2001-06-22 09:54:00

欲在串s1中查找串s2出现的地方，这两个串在查找时都不区分大小写。

如果先把s2和s1全都转换成大写，然后再用strstr( )匹配，感觉这样效率不高。而且我的程序中的s1实际上是不允许修改的，因此如果要把s1转换成大写，先得把s1复制一份，这也会降低效率，还得分配额外的内存。

有无高效的算法？thx

...全文

60 2 打赏收藏转发到动态举报

写回复

用AI写文章

2 条回复

切换为时间正序

请发表友善的回复…

发表回复

Netguy 2001-06-22

打赏
举报

谢啦，帮我大忙了。先给分，回头再试。

孩皮妞野 2001-06-22

打赏
举报

Author: James Buchanan

头文件

/*

Super fast linear text search algorithms:
searchi = search ignore case
search = search case sensitive
searchiw = search ignore case words only (e.g. words delimited by whitespace only,
not words within words)
searchw() = search case sensitive words only

All functions return the number of matches for keyword in buffer, or -1 on error.

by James Buchanan
No license ristrictions on this code.

Email: jamesb@northnet.com.au

*/

#ifndef __TEXTSEARCH_H
#define __TEXTSEARCH_H

#include <stdio.h>
#include <stdlib.h>

int searchi(const char *buffer, const char *keyword);
int search(const char *buffer, const char *keyword);
int searchiw(const char *buffer, const char *keyword);
int searchw(const char *buffer, const char *keyword);

#endif

CPP文件

/*

Super fast linear text search algorithms:
searchi = search ignore case
search = search case sensitive
searchiw = search ignore case words only (e.g. words delimited by whitespace only,
not words within words)
searchw() = search case sensitive words only

All functions return the number of matches for keyword in buffer, or -1 on error.

by James Buchanan
No license ristrictions on this code.

Email: jamesb@northnet.com.au

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "textsearch.h"

int searchi(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (!b_len || !k_len)
return -1;
ch_matches = found = 0;
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (tolower(buffer[i+j]) == tolower(keyword[j])) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
return found;
}

int search(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (!b_len || !k_len)
return -1;
ch_matches = found = 0;
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (buffer[i+j] == keyword[j]) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
return found;
}

int searchiw(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
char *temp_keyword;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (b_len < 2 || k_len < 2) /* Useless, for words only */
return -1;
if (keyword[0] != ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+3, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] == ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] != ' ' && keyword[k_len-1] == ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
}
else {
/* If we get to here and no if statement has executed, keyword already has whitespaces
surrounding it */
temp_keyword = (char *)calloc(k_len+1, sizeof(char));
if (!temp_keyword)
return -1;
strcpy(temp_keyword, keyword);
}
ch_matches = found = 0;
k_len = strlen(temp_keyword); /* Calculate new string length */
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (buffer[i+j] == temp_keyword[j]) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
if (temp_keyword != NULL)
free(temp_keyword);
return found;
}

int searchw(const char *buffer, const char *keyword)
{
int k_len, b_len, ch_matches, found, i, j;
char *temp_keyword;
b_len = strlen(buffer);
k_len = strlen(keyword);
if (b_len < 2 || k_len < 2) /* Useless, for words only */
return -1;
if (keyword[0] != ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+3, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] == ' ' && keyword[k_len-1] != ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
strcat(temp_keyword, keyword);
strcat(temp_keyword, " ");
}
else if (keyword[0] != ' ' && keyword[k_len-1] == ' ') {
temp_keyword = (char *)calloc(k_len+2, sizeof(char));
if (!temp_keyword)
return -1;
temp_keyword[0] = ' ';
strcat(temp_keyword, keyword);
}
else {
/* If we get to here and no if statement has executed, keyword already has whitespaces
surrounding it */
temp_keyword = (char *)calloc(k_len+1, sizeof(char));
if (!temp_keyword)
return -1;
strcpy(temp_keyword, keyword);
}
ch_matches = found = 0;
k_len = strlen(temp_keyword); /* Calculate new string length */
for (i=0; i<b_len-k_len; i++) {
ch_matches = 0;
for (j=0; j<k_len; j++) {
if (buffer[i+j] == temp_keyword[j]) {
ch_matches++;
if (ch_matches == k_len) {
found++;
i += k_len;
}
}
}
}
if (temp_keyword != NULL)
free(temp_keyword);
return found;
}

如果你觉得他不够快，告诉我一声。

Java 区分大小写 类名是以大写字母开头的名词。如果名字由多个单词组成，每个单词的第一个字母都应该大写（这种在一个单词中间使用大写字母的方式称为驼峰命名法。例如CamelCase)。源代码的文件名必须与公共类...

注意：这里需要使用一个pair记录前一个dp[i]的最长子串的始末位置，在判断s[i]与s[i-1]不等时，需要继续往前判断s[i]是否包含在前一个dp[i-1]的子串中，如果在，则当前的dp[i]需要减去dp[i-1]子串的前一部分。...

第一章正则表达式字符匹配攻略正则表达式是匹配模式，要么...如果正则只有精确匹配是没多大意义的，比如/hello/，也只能匹配字符串中的"hello"这个子串。 var regex = /hello/; console.log( regex.test("hello") );

Simhash算法学习及python实现1. Simhash算法是什么？2.Simhash算法思想3.Simhash算法流程3.1 分词3.2 hash3.3 加权3.4 合并3.5降维4. SimHash签名距离计算4.1 什么是海明距离呢？4.2 大规模数据下的海明距离计算参考...

首先利用和大小写字母A的差值，对应0-26，来将所有字母按顺序排好；可以接空瓶子，相当于每有两个空瓶就可以喝一瓶，结果直接除以2即可。使用动态规划，找到最长递增子序列+最长递减子序列；就是找I里是否包含R的...