513 lines
13 KiB
C++
513 lines
13 KiB
C++
//
|
|
// The developer of the original code and/or files is Tripwire, Inc.
|
|
// Portions created by Tripwire, Inc. are copyright (C) 2000-2017 Tripwire,
|
|
// Inc. Tripwire is a registered trademark of Tripwire, Inc. All rights
|
|
// reserved.
|
|
//
|
|
// This program is free software. The contents of this file are subject
|
|
// to the terms of the GNU General Public License as published by the
|
|
// Free Software Foundation; either version 2 of the License, or (at your
|
|
// option) any later version. You may redistribute it and/or modify it
|
|
// only in compliance with the GNU General Public License.
|
|
//
|
|
// This program is distributed in the hope that it will be useful.
|
|
// However, this program is distributed AS-IS WITHOUT ANY
|
|
// WARRANTY; INCLUDING THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
|
|
// FOR A PARTICULAR PURPOSE. Please see the GNU General Public License
|
|
// for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
|
// USA.
|
|
//
|
|
// Nothing in the GNU General Public License or any other license to use
|
|
// the code or files shall permit you to use Tripwire's trademarks,
|
|
// service marks, or other intellectual property without Tripwire's
|
|
// prior written consent.
|
|
//
|
|
// If you have any questions, please contact Tripwire, Inc. at either
|
|
// info@tripwire.org or www.tripwire.org.
|
|
//
|
|
/*
|
|
* Name....: ntmbs.h
|
|
* Date....: 08/31/1999
|
|
* Creator.: rdifalco
|
|
*
|
|
* Routines to handle NTMBS (null-terminated multi-byte character sequences)
|
|
* as well as mappings to work between NTMBS and like-sized character-type
|
|
* sequences.
|
|
*/
|
|
|
|
#ifndef __NTMBS_H
|
|
#define __NTMBS_H
|
|
|
|
|
|
/// Requirements.
|
|
|
|
#include "error.h" // for: TSS_EXCEPTION
|
|
#include "package.h" // for: TSS_GetString
|
|
#include "corestrings.h" // for: Utility Strings
|
|
|
|
|
|
/// Type Definitions for Multiple (?), Double (16), and Wide Characters (32)
|
|
|
|
//--Null-terminated Multibyte Character Sequence
|
|
|
|
#ifndef NTMBS_T_DEFINED
|
|
#define NTMBS_T_DEFINED
|
|
#ifdef TSS_NTMBS_IS_UNSIGNED // Natural-sign by default
|
|
typedef unsigned char mbchar_t;
|
|
typedef unsigned char* ntmbs_t;
|
|
typedef const unsigned char* const_ntmbs_t;
|
|
#else //TSS_NTMBS_IS_UNSIGNED
|
|
typedef char mbchar_t;
|
|
typedef char* ntmbs_t;
|
|
typedef const char* const_ntmbs_t;
|
|
#endif//TSS_NTMBS_IS_UNSIGNED
|
|
#endif//NTMBS_T_DEFINED
|
|
|
|
|
|
//--Null-terminated Wide Character Sequence (Could be double or quad byte)
|
|
|
|
#ifndef NTWCS_T_DEFINED
|
|
#define NTWCS_T_DEFINED
|
|
//typedef wchar_t wchar_t;
|
|
typedef wchar_t* ntwcs_t;
|
|
typedef const wchar_t* const_ntwcs_t;
|
|
#endif//NTWCS_T_DEFINED
|
|
|
|
|
|
|
|
/// NOTE: Size Specific (2 [double] or 4 [quad] byte wide characters)
|
|
|
|
//--Null-terminated double(2)-byte Character Sequence
|
|
|
|
#ifndef NTDBS_T_DEFINED
|
|
#define NTDBS_T_DEFINED
|
|
#if WCHAR_IS_16_BITS
|
|
typedef wchar_t dbchar_t; // Same size but use NT's type
|
|
#else
|
|
typedef uint16 dbchar_t;
|
|
#endif
|
|
typedef dbchar_t* ntdbs_t;
|
|
typedef const dbchar_t* const_ntdbs_t;
|
|
#endif//NTDBS_T_DEFINED
|
|
|
|
|
|
//--Internal, "size-specific" types for type dispatched specializations
|
|
|
|
#ifndef NTQBS_T_DEFINED
|
|
#define NTQBS_T_DEFINED
|
|
#if WCHAR_IS_32_BITS
|
|
typedef wchar_t qbchar_t; // Same size but use NT's type
|
|
#else
|
|
typedef uint32 qbchar_t;
|
|
#endif
|
|
typedef qbchar_t* ntqbs_t;
|
|
typedef const qbchar_t* const_ntqbs_t;
|
|
#endif//NTQBS_T_DEFINED
|
|
|
|
|
|
|
|
//- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
// NTMBS Manipulators
|
|
//- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
TSS_EXCEPTION( eCharacter, eError );
|
|
TSS_EXCEPTION( eCharacterEncoding, eCharacter );
|
|
|
|
|
|
namespace tss
|
|
{
|
|
/// Specific Routines (Add as needed)
|
|
|
|
//ntmbs_t mbsdec( const_ntmbs_t, const_ntmbs_t );
|
|
ntmbs_t mbsinc( const_ntmbs_t );
|
|
|
|
size_t mbsbytes( const_ntmbs_t, size_t );
|
|
size_t mbscount( const_ntmbs_t, size_t );
|
|
|
|
size_t mbsbytes( const_ntmbs_t );
|
|
size_t mbscount( const_ntmbs_t );
|
|
|
|
inline size_t mbslen( const_ntmbs_t psz ) { // RAD: Yeesh!
|
|
return tss::mbscount( psz );
|
|
}
|
|
}//tss::
|
|
|
|
|
|
//- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
// Character Sequence Independent Mappings
|
|
//- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
// NOTE: In each, we first do the homogeneous charsize ver, then the hetero
|
|
|
|
namespace tss
|
|
{
|
|
/// Increment and Decrement Pointers by (N)
|
|
|
|
//--DEC
|
|
|
|
template< class CT >
|
|
inline
|
|
CT* strdec( const CT*, const CT* psz ) {
|
|
return const_cast<CT*>( psz - 1 );
|
|
}
|
|
|
|
#if 0
|
|
template<>
|
|
inline
|
|
ntmbs_t strdec( const_ntmbs_t beg, const_ntmbs_t cur ) {
|
|
return tss::mbsdec( beg, cur );
|
|
}
|
|
#endif
|
|
|
|
|
|
//--INC
|
|
|
|
template< class CT >
|
|
inline
|
|
CT* strinc( const CT* psz )
|
|
{
|
|
ASSERT( psz );
|
|
return const_cast<CT*>( ( *psz ? psz + 1 : psz ) );
|
|
}
|
|
|
|
template< class CT, class SIZET >
|
|
inline
|
|
CT* strinc( const CT* psz, SIZET N )
|
|
{
|
|
ASSERT( psz );
|
|
return const_cast<CT*>( ( *psz ? psz + N : psz ) );
|
|
}
|
|
|
|
|
|
template<>
|
|
inline
|
|
ntmbs_t strinc( const_ntmbs_t psz )
|
|
{
|
|
return tss::mbsinc( psz );
|
|
}
|
|
|
|
template<>
|
|
inline
|
|
ntmbs_t strinc( const_ntmbs_t psz, size_t N )
|
|
{
|
|
ntmbs_t at = const_cast<ntmbs_t>( psz );
|
|
while ( N-- )
|
|
at = tss::mbsinc( at );
|
|
return at;
|
|
}
|
|
|
|
|
|
/// Byte and Character Count Mappings
|
|
|
|
|
|
//--BYTES
|
|
|
|
template< class CT >
|
|
size_t strsize( const CT* psz )
|
|
{
|
|
const CT* at = psz;
|
|
while ( *at ) at++;
|
|
return (size_t)((char*)at - (char*)psz);
|
|
}
|
|
|
|
template< class CT >
|
|
size_t strsize( const CT*, size_t N )
|
|
{
|
|
return N * sizeof(CT);
|
|
}
|
|
|
|
template<>
|
|
inline size_t strsize( const_ntmbs_t psz ) {
|
|
return mbsbytes( psz );
|
|
}
|
|
template<>
|
|
inline size_t strsize( const_ntmbs_t psz, size_t N ) {
|
|
return mbsbytes( psz, N );
|
|
}
|
|
|
|
|
|
//--COUNT
|
|
|
|
template< class CT >
|
|
size_t strcount( const CT* psz )
|
|
{
|
|
const CT* at = psz;
|
|
while ( *at ) at++;
|
|
return (size_t)(at - psz);
|
|
}
|
|
|
|
template< class CT >
|
|
size_t strcount( const CT*, size_t N )
|
|
{
|
|
return N;
|
|
}
|
|
|
|
template<>
|
|
inline size_t strcount( const_ntmbs_t psz ) {
|
|
return mbscount( psz );
|
|
}
|
|
template<>
|
|
inline size_t strcount( const_ntmbs_t psz, size_t N ) {
|
|
return mbscount( psz, N );
|
|
}
|
|
|
|
|
|
/* These are needed! However, it would be better to just create
|
|
a NTMBS aware string class (basic_string *can't* be) and be
|
|
done with it */
|
|
|
|
#ifdef TSS_NTMBS_AWARE_SEACH_COMPLETE
|
|
|
|
/* CAUTION:RAD -- I changed the traversal logic from "!=" to
|
|
"<" to accomodate a basic_string whose end() member does not
|
|
correctly point after the last valid trail-byte in a string.
|
|
Really, at != end should be correct, but I don't want to leave
|
|
any room for error. At some point, these routines should be put
|
|
under unit test and have all occurances of "at < end" changed
|
|
to the "more on-purpose" "at != end". */
|
|
|
|
/// Various MULTIBYTE aware string searching routines..
|
|
|
|
//--FIND_FIRST: ITER
|
|
|
|
template< class InputT, class E >
|
|
InputT
|
|
find_first( InputT beg, InputT end, const E& item )
|
|
{
|
|
for ( ; beg < end && *beg != item; beg = (InputT)tss::strinc( beg ) );
|
|
return beg;
|
|
}
|
|
|
|
|
|
//--FIND_LAST: ITER
|
|
|
|
template< class InputT, class E >
|
|
InputT
|
|
find_last( InputT beg, InputT end, const E& item )
|
|
{
|
|
InputT at = end;
|
|
for ( ; beg < end ; beg = (InputT)tss::strinc( beg ) );
|
|
if ( *beg == item )
|
|
at = beg;
|
|
|
|
return at;
|
|
}
|
|
|
|
|
|
//--FIND_FIRST: STRING
|
|
|
|
template< class StrT >
|
|
StrT::size_type
|
|
find_first( const StrT& sin, StrT::const_reference item )
|
|
{
|
|
StrT::const_iterator
|
|
beg = sin.begin();
|
|
end = sin.end();
|
|
|
|
StrT::size_type N;
|
|
for ( N = 0; beg < end; ++N, beg = (InputT)tss::strinc( beg ) )
|
|
if ( *beg == item )
|
|
return N;
|
|
|
|
return StrT::npos;
|
|
}
|
|
|
|
|
|
//--FIND_LAST: STRING
|
|
|
|
template< class StrT >
|
|
StrT::size_type
|
|
find_last( const StrT& sin, StrT::const_reference item )
|
|
{
|
|
StrT::size_type N = 0;
|
|
StrT::size_type nResult = StrT::npos;
|
|
StrT::const_iterator
|
|
beg = sin.begin();
|
|
end = sin.end();
|
|
|
|
for ( ; beg < end ; beg = (InputT)tss::strinc( beg ) );
|
|
if ( *beg == item )
|
|
nResult = N;
|
|
|
|
return N;
|
|
}
|
|
|
|
|
|
//--FIND_FIRST_OF: ITER
|
|
|
|
template< class InputT >
|
|
InputT
|
|
find_first_of(
|
|
InputT beg,
|
|
InputT end,
|
|
InputT setbeg,
|
|
InputT setend )
|
|
{
|
|
InputT at;
|
|
for ( ; beg < end; beg = (InputT)tss::strinc( beg ) );
|
|
for ( at = setbeg; setbeg < setend; at = (InputT)tss::strinc( at ) )
|
|
if ( *beg == *at )
|
|
return beg;
|
|
|
|
return end;
|
|
}
|
|
|
|
|
|
//--FIND_FIRST_OF: STRING (SAFE!!!)
|
|
|
|
template< class StrT >
|
|
StrT::size_type
|
|
find_first_of( const StrT& sin, StrT::const_iterator set )
|
|
{
|
|
// Point to beg of input
|
|
StrT::iterator beg = sin.begin();
|
|
|
|
// Start Search
|
|
StrT::size_type N = 0;
|
|
for ( ; beg < sin.end(); ++N, beg = tss::strinc( beg ) )
|
|
for ( StrT::const_iterator at = set; *at; at = tss::strinc( at ) )
|
|
if ( *beg == *at )
|
|
return N;
|
|
|
|
return StrT::npos;
|
|
}
|
|
|
|
|
|
//--FIND_FIRST_OF: STRING (NOT SAFE!!!)
|
|
|
|
template< class StrT >
|
|
StrT::size_type
|
|
find_first_of(
|
|
const StrT& sin,
|
|
StrT::const_iterator set,
|
|
StrT::size_type nPos,
|
|
StrT::size_type nCount )
|
|
{
|
|
ASSERT( nPos < tss::strcount( sin.begin() );
|
|
|
|
if ( nCount > 0 && nPos < sin.size() )
|
|
{
|
|
ASSERT( nCount > tss::strcount( set ) );
|
|
StrT::const_iterator endset; // Get end of set
|
|
while ( nCount-- ) endset++;
|
|
|
|
// Advance to nPos
|
|
StrT::const_iterator at = tss::strinc( sin.begin(), nPos );
|
|
StrT::const_iterator end = sin.end();
|
|
|
|
// Start Search
|
|
StrT::size_type N = 0;
|
|
for ( ; at < end; ++N, at = tss::strinc( at ) )
|
|
{
|
|
if ( tss::find( set, endset, *at ) != 0 )
|
|
return N;
|
|
}
|
|
}
|
|
|
|
return StrT::npos;
|
|
}
|
|
|
|
|
|
//--FIND_LAST_OF: ITER
|
|
|
|
template< class InputT1, class InputT2 >
|
|
InputT
|
|
find_last_of(
|
|
const InputT1 beg, const InputT1 end,
|
|
const InputT2 setbeg, const InputT2 setend )
|
|
{
|
|
const InputT1 ans = end;
|
|
for ( ; beg < end; beg = tss::strinc( beg ) );
|
|
for ( InputT2 at = setbeg; setbeg != setend; at = tss::strinc( at ) )
|
|
if ( *beg == *at )
|
|
ans = beg;
|
|
|
|
return ans;
|
|
}
|
|
|
|
|
|
//--FIND_LAST_OF: STRING (SAFE!!!)
|
|
|
|
template< class StrT >
|
|
StrT::size_type
|
|
find_last_of( const StrT& sin, StrT::const_iterator set )
|
|
{
|
|
StrT::size_type nResult = StrT::npos;
|
|
StrT::size_type N = 0;
|
|
for ( ; at < end; ++N, at = tss::strinc( at ) )
|
|
{
|
|
for ( StrT::const_iterator at = set; *at; at = tss::strinc( at ) )
|
|
if ( *beg == *at )
|
|
nResult = N;
|
|
}
|
|
|
|
return nResult;
|
|
}
|
|
|
|
|
|
template< class StrT >
|
|
StrT::size_type
|
|
find_last_of(
|
|
const StrT& sin,
|
|
StrT::const_iterator set,
|
|
StrT::size_type nStart,
|
|
StrT::size_type nCount )
|
|
{
|
|
if ( nCount > 0 && sin.size() )
|
|
{
|
|
for ( StrT::const_iterator at = sin.begin()
|
|
+ (nStart < _Len ? nStart : _Len - 1); ; --_U)
|
|
if (_Tr::find(set, nCount, *_U) != 0)
|
|
return (_U - _Ptr);
|
|
else if (_U == _Ptr)
|
|
break;
|
|
}
|
|
|
|
return StrT::npos;
|
|
}
|
|
|
|
// TODO:RAD -- find_first_not_of;
|
|
// TODO:RAD -- find_last_not_of;
|
|
|
|
#endif//TSS_NTMBS_AWARE_SEACH_COMPLETE
|
|
|
|
namespace util
|
|
{
|
|
// Preserves bit values when enlarging a type to a size_t
|
|
// Good for preventing sign extension
|
|
template< class E >
|
|
inline
|
|
size_t
|
|
char_to_size( E ch )
|
|
{
|
|
// if this fails, 'ch' can't be cast to a
|
|
// size_t and preserve bit values
|
|
// if this fails, then you must find another way
|
|
ASSERT( sizeof( size_t ) >= sizeof( E ) );
|
|
|
|
// assert that either 'ch' is an unsigned value (no sign extension possible)
|
|
// or that 'ch' is the same size as a size_t (no sign extension is possible as well)
|
|
// if this fails, then you must specialize this function like we did for 'char'
|
|
ASSERT( (int)(E)-1 > (int)0 || sizeof( size_t ) == sizeof( E ) );
|
|
|
|
return (size_t)ch;
|
|
}
|
|
|
|
|
|
inline
|
|
size_t
|
|
char_to_size( char ch )
|
|
{
|
|
return (size_t)(unsigned char)ch;
|
|
}
|
|
}
|
|
}//tss::
|
|
|
|
|
|
#endif//__NTMBS_H
|
|
|