虫虫:关于字符串trim函数的讨论,请诸位大虾不吝赐教
在CSDN论坛C/C++版上曾有一个问题,问如何将一个字符串两端的空格去掉。我曾在《程序员》2002年5月号的“专家门诊”部分作一小结,后来感觉有疏漏之处,非常抱歉。当时有朋友给出了下面的解答:
#include <string>
void trim(std::string& s)
{
const char c = ' ';
s.erase(0, s.find_first_not_of(c));
s.erase(s.find_last_not_of(c) + 1);
}
这个解答已经相当完备(我曾以为有效率问题,其实多虑了,成员函数find_first_not_of和find_last_not_of的实现是相当完美的)。不过在应用范围上还略有不足,比如不能处理双字节字符串,或也不能删除首尾特定的字符(只能是单字节空格)。在那次小结中,我曾尝试给出一种通用的办法,不过也存在上述问题。此外,其实现还涉及在处理方式上的权衡:是修改传入的字符串,还是保持传入字符串不变并返回一处理后的字符串?
我尝试写了一个基于迭代器的ctrim函数,可以处理各种容器(包括原生数组),可以指定删除的字符,并兼顾了两种处理方式,供诸位参考使用。
ctrim代码
//---------------------------------------------------------------------------
#include <iterator>
#include <functional>
#include <algorithm>
#ifndef __GNUC__
#include <locale>
#endif
namespace trim
{
//---------------------------------------------------------------------------
template <typename T> struct space;
template <> struct space<char>: public std::unary_function<char, bool>
{
bool operator()(char c) const {return c <= ' ';}
};
template <> struct space<wchar_t>: public std::unary_function<wchar_t, bool>
{
bool operator()(wchar_t c) const {return c <= L' ';}
};
//---------------------------------------------------------------------------
template <typename T> class value: std::unary_function<T, bool>
{
private:
T val_;
public:
explicit value(const T& v): val_(v) {}
bool operator()(const T& v) const {return v == val_;}
};
//---------------------------------------------------------------------------
#ifndef __GNUC__
template <typename T, std::ctype_base::mask type = std::ctype_base::space>
class locale_char: public std::unary_function<T, bool>
{
private:
const std::ctype<T> & ctype_;
public:
bool operator() (const T& v) const {return ctype_.is(type, v);}
explicit locale_char(const std::locale& loc = std::locale())
: ctype_(std::use_facet<std::ctype<T> >(loc)) {}
};
#endif
//---------------------------------------------------------------------------
template <typename BidirectionalIterator, typename OutputIterator, typename Predicate>
OutputIterator
ctrim( BidirectionalIterator first,
BidirectionalIterator last,
OutputIterator result,
Predicate pred )
{
typedef std::reverse_iterator<BidirectionalIterator> ReverseIterator;
typedef typename std::iterator_traits<BidirectionalIterator>::value_type value_type;
BidirectionalIterator start = std::find_if(first, last, std::not1(pred));
return std::copy( start,
std::find_if( ReverseIterator(last),
ReverseIterator(start),
std::not1(pred) ).base(),
result );
}
template <typename BidirectionalIterator, typename OutputIterator>
OutputIterator
ctrim( BidirectionalIterator first,
BidirectionalIterator last,
OutputIterator result )
{
typedef typename std::iterator_traits<BidirectionalIterator>::value_type value_type;
return ctrim(first, last, result, space<value_type>());
}
template <typename C, typename Predicate> void ctrim(C& c, Predicate pred)
{
c.erase(c.begin(), std::find_if(c.begin(), c.end(), std::not1(pred)));
c.erase(std::find_if( c.rbegin(), c.rend(), std::not1(pred) ).base(), c.end());
}
template <typename C> void ctrim(C& c)
{
typedef typename C::value_type value_type;
return ctrim(c, space<value_type>());
}
}
using trim::ctrim;
应用代码:
#include <iostream>
#include <string>
#include <boost/compose.hpp>
#include <list>
int main()
{
std::string s = " _abc __";
std::ostream_iterator<char> out(std::cout);
std::cout << '$';
ctrim(s.begin(), s.end(), out);
// sends a copy of the string without leading or trailing spaces to cout
// ctrim(s) changes the string itself
std::cout << '$' << std::endl;
// output: $_abc __$
std::cout << '$';
ctrim(s.begin(), s.end(), out, trim::value<char>('_'));
// sends a copy of the string
// without leading or trailing underline('_') characters to cout
std::cout << '$' << std::endl;
// output: $ _abc $
#ifndef __GNUC__
std::wstring ws = L" _abc__ ";
// Chinese spaces
ctrim(ws, trim::locale_char<wchar_t>());
// erase the leading or trailing Chinese spaces(2 bytes) to cout
std::wcout << L"$" << ws << L"$" << std::endl;
// output: $_abc__$
// it's ok with Dinkum STL, failed with Stlport and RogueWave STL
#endif
ctrim(s,
::boost::compose_f_gx_hx(std::logical_or<bool>(),
trim::value<char>('_'),
trim::value<char>(' ')));
// erase both leading and trailing underline('_') and space(' ') characters;
// boost::compose_f_gx_hx is the same as compose2 in SGI STL
std::cout << '$' << s << '$' << std::endl;
// output: $abc$
int a[] = {0, 1, 2, 0, 0, 0};
std::list<int> a_list(a, a+sizeof(a)/sizeof(int));
ctrim(a_list.begin(), a_list.end(),
std::ostream_iterator<int>(std::cout, " "),
trim::value<int>(0));
std::cout << std::endl;
// output: 1 2
}
请诸位给出自己的看法,谢谢!