// The developer of the original code and/or files is Tripwire, Inc. // Portions created by Tripwire, Inc. are copyright (C) 2000 Tripwire, // Inc. Tripwire is a registered trademark of Tripwire, Inc. All rights // reserved. // // This program is free software. The contents of this file are subject // to the terms of the GNU General Public License as published by the // Free Software Foundation; either version 2 of the License, or (at your // option) any later version. You may redistribute it and/or modify it // only in compliance with the GNU General Public License. // // This program is distributed in the hope that it will be useful. // However, this program is distributed AS-IS WITHOUT ANY // WARRANTY; INCLUDING THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS // FOR A PARTICULAR PURPOSE. Please see the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, // USA. // // Nothing in the GNU General Public License or any other license to use // the code or files shall permit you to use Tripwire's trademarks, // service marks, or other intellectual property without Tripwire's // prior written consent. // // If you have any questions, please contact Tripwire, Inc. at either // info@tripwire.org or www.tripwire.org. // // // Name....: codeconvert.cpp // Date....: 9/2/99 // Creator.: Brian McFeely (bmcfeely) // // Convert a string between locale NTMBS encoding and UCS2 // /// Requirements #include "stdcore.h" // for: pch #include "core/codeconvert.h" // for: These classes #include "corestrings.h" // for: Error Strings #include "fsservices.h" // for: strerror #include "ntmbs.h" #include "errno.h" #ifdef DEBUG #define TSS_CCONV_VERBOSE // Uncomment for verbose tracing! #endif //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // DEFINES //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #define TSS_USE_ICONV_CCONV16 HAVE_ICONV_H #define TSS_USE_UCS2_CCONV16 (!(HAVE_ICONV_H) && WCHAR_REP_IS_UCS2 && WCHAR_IS_16_BITS) #define TSS_USE_UCS2_CCONV32 (!(HAVE_ICONV_H) && WCHAR_REP_IS_UCS2 && WCHAR_IS_32_BITS) #if ICONV_CONST_SOURCE # define ICONV_SOURCE_TYPE const char #else # define ICONV_SOURCE_TYPE char #endif //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Static Data //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iCodeConverter* iCodeConverter::m_pInst = NULL; //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // iCodeConverter //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // has a maximum of 512 chars of output std::string util_output_bytes( void* p, size_t n ) { std::ostringstream ss; ss.imbue( std::locale::classic() ); ss.setf( std::ios_base::hex, std::ios_base::basefield ); for( size_t i = 0; i < n; i++ ) { ss.width( 2 ); ss << toupper( tss::util::char_to_size( ((char*)p)[i] ) ) << " "; } std::string s = ss.str(); if( s.length() > 512 ) { s = "truncated output: " + s; s.resize( 512 ); } return s; } iCodeConverter* iCodeConverter::GetInstance() { if( ! m_pInst ) { m_pInst = CreateConverter(); } ASSERT( m_pInst ); return m_pInst; } iCodeConverter* iCodeConverter::CreateConverter() { cDebug d("iCodeConverter::CreateConverter()"); #if TSS_USE_ICONV_CCONV16 if( cIconvConverter::Test() ) { d.TraceDebug("using cIconvConverter\n"); return new cIconvConverter; } else if( cDoubleIconvConverter::Test() ) { d.TraceDebug("using cDoubleIconvConverter\n"); return new cDoubleIconvConverter; } else { d.TraceDebug("using CreateGoodEnoughConverter\n"); return CreateGoodEnoughConverter(); } #elif TSS_USE_UCS2_CCONV32 d.TraceDebug("using cWcharIs32BitUcs2Converterer\n"); return new cWcharIs32BitUcs2Converterer; #elif TSS_USE_UCS2_CCONV16 d.TraceDebug("using cWcharIs16BitUcs2Converterer\n"); return new cWcharIs16BitUcs2Converterer; #else // we don't have a proper converter, so fudge a little... d.TraceDebug("using CreateGoodEnoughConverter\n"); return CreateGoodEnoughConverter(); #endif ASSERT( ! "reachable" ); } iCodeConverter* iCodeConverter::CreateGoodEnoughConverter() { // // let someone know that we don't work on non-english OSs... // cDebug d( "iCodeConverter::CreateGoodEnoughConverter()" ); d.TraceAlways( "database WILL NOT be portable to other platforms.\n" ); return new cGoodEnoughConverterer; } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // iconv Converters //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if TSS_USE_ICONV_CCONV16 #include #include //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Module Locale Helpers for iconv() //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #include "core/ntmbs.h" namespace /*Unique*/ { void tss_dispatch_iconvopen_error() { cDebug d("tss_dispatch_iconvopen_error"); switch( errno ) { case EMFILE: d.TraceDebug( "EMFILE: {OPEN_MAX} files descriptors are currently open in the calling process..\n" ); break; case ENFILE: d.TraceDebug( "ENFILE: Too many files are currently open in the system.\n" ); break; case ENOMEM: d.TraceDebug( "ENOMEM: Insufficient storage space is available.\n" ); break; case EINVAL: d.TraceDebug( "EINVAL: The conversion specified by fromcode and tocode is not supported by the implementation.\n" ); break; default: d.TraceDebug( "UNKNOWN: Unknown error.\n" ); break; } } int tss_dispatch_errno( cDebug& d ) { // Reasons for failure: // [EILSEQ] Input conv stopped due to an unknown input byte // [E2BIG] Input conversion stopped due to lack of space in the output // [EINVAL] Input conversion stopped due to an incomplete character // [EBADF] The cd argument is not a valid open conversion descriptor. // errno d.TraceDebug( _T( "iconv failed with: %s\n" ), iFSServices::GetInstance()->GetErrString().c_str() ); /// RAD: Always throw on error (Let user Catch if they want to go on) switch ( errno ) { case EILSEQ: case EINVAL: throw eConverterFatal ( TSS_GetString( cCore, core::STR_ERR_BADCHAR ) ); break; case E2BIG: throw eConverterFatal( TSS_GetString(cCore, core::STR_ERR_OVERFLOW ) ); break; case EBADF: throw eConverterUnknownCodepage( TSS_GetString( cCore, core::STR_UNKNOWN ) ); break; default: throw eConverterFatal( TSS_GetString( cCore, core::STR_UNKNOWN ) ); } return -1; } template< class BufferT, class SourceT > class ByteConvert { public: bool Convert( BufferT** ppBuf, size_t* pnBufferLeft, const SourceT** ppSrc, size_t* pnSourceLeft ) { cDebug d( "ByteConvert::Convert< BufferT, SourceT >()" ); //-- Copy manually into Buffer **ppBuf = cConvertUtil::ConvertNonChar( **ppSrc ); //-- Decrement counters *pnSourceLeft -= sizeof(SourceT); // Decrement Source Left *pnBufferLeft -= sizeof(BufferT); // Decrement Buffer Left //-- Reposition pointers (*ppSrc)++; // Skip one SourceT fwd (*ppBuf)++; // Skip one BufferT fwd return true; } }; class UTF8_Util { public: enum { INVALID_VALUE = 0xFF }; }; // convert to INVALID_VALUE and remember the byte value template< class BufferT, class SourceT > class ToUTF8Convert { public: ToUTF8Convert( std::list& lb ) : m_lb( lb ) {} bool Convert( mbchar_t** ppBuf, size_t* pnBufferLeft, const dbchar_t** ppSrc, size_t* pnSourceLeft ) { cDebug d( "ToUTF8Convert::Convert< mbchar_t, dbchar_t >()" ); // store the non char value m_lb.push_back( cConvertUtil::ConvertNonChar( **ppSrc ) ); // flag 'invalid char' **ppBuf = (mbchar_t)UTF8_Util::INVALID_VALUE; //-- Decrement counters *pnSourceLeft -= sizeof(dbchar_t); // Decrement Source Left *pnBufferLeft -= sizeof(mbchar_t); // Decrement Buffer Left //-- Reposition pointers (*ppSrc)++; // Skip one SourceT fwd (*ppBuf)++; // Skip one BufferT fwd return true; } bool Convert( mbchar_t** ppBuf, size_t* pnBufferLeft, const mbchar_t** ppSrc, size_t* pnSourceLeft ) { cDebug d( "ToUTF8Convert::Convert< char, char >()" ); // store non-char value m_lb.push_back( **ppSrc ); // flag 'invalid char' **ppBuf = (char)UTF8_Util::INVALID_VALUE; //-- Decrement counters *pnSourceLeft -= sizeof(mbchar_t); // Decrement Source Left *pnBufferLeft -= sizeof(mbchar_t); // Decrement Buffer Left //-- Reposition pointers (*ppSrc)++; // Skip one SourceT fwd (*ppBuf)++; // Skip one BufferT fwd return true; } private: // TODO:Get rid of this guy and just use a vector and // insert them when finished with second step std::list& m_lb; }; template< class BufferT, class SourceT > class FromUTF8Convert { public: FromUTF8Convert( std::list& lb ) : m_lb( lb ) {} bool Convert( dbchar_t** ppBuf, size_t* pnBufferLeft, const mbchar_t** ppSrc, size_t* pnSourceLeft ) { cDebug d( "FromUTF8Convert::Convert< dbchar_t, mbchar_t >()" ); //-- Must be our flag char size_t value = tss::util::char_to_size( **ppSrc ); if ( value != (size_t)UTF8_Util::INVALID_VALUE ) { d.TraceDebug( "value was not flag char: %X...\n", value ); return false; } //-- Get correct character from our byte array ASSERT( ! m_lb.empty() ); mbchar_t newVal = (mbchar_t)tss::util::char_to_size( m_lb.front() ); m_lb.pop_front(); //-- Copy character d.TraceDebug( "converted to: %X\n", tss::util::char_to_size( newVal ) ); **ppBuf = cConvertUtil::ConvertNonChar( newVal ); //-- Decrement counters *pnSourceLeft -= sizeof(mbchar_t); // Decrement Source Left *pnBufferLeft -= sizeof(dbchar_t); // Decrement Buffer Left //-- Reposition pointers (*ppSrc)++; // Skip one mbchar_t fwd (*ppBuf)++; // Skip one dbchar_t fwd return true; } // Converts a UTF-8 value to corresponding UCS2 char (in the private // use range) whose value is 0xE000 < x < 0xE0FF in UCS2. // Must be of the form 11101110 100000xx 10xxxxxx bool Convert( mbchar_t** ppBuf, size_t* pnBufferLeft, const mbchar_t** ppSrc, size_t* pnSourceLeft ) { cDebug d( "FromUTF8Convert::Convert< mbchar_t, mbchar_t >()" ); /* const size_t NUM_CHARS = 3; //-- Must be our cast byte value if( *pnBufferLeft < NUM_CHARS ) { d.TraceDebug( "not enough buffer space to be our cast byte: %d\n", *pnBufferLeft ); return false; } size_t first = tss::util::char_to_size( (*ppSrc)[0] ); size_t second = tss::util::char_to_size( (*ppSrc)[1] ); size_t third = tss::util::char_to_size( (*ppSrc)[2] ); d.TraceDebug( "have: first: 0x%08X, second: 0x%08X, third: 0x%08X\n", first, second, third ); //-- We expect the value to be UCS2_RESERVED_START + [0x00,0xFF] // ( 0xE000 - 0xE0FF ) which would be of the form // 11101110 100000xx 10xxxxxx in UTF-8 if( ( first != 0xEEu ) || ( ( second & 0xFCu ) != 0x80u ) || ( ( third & 0xC0u ) != 0x80u ) ) { d.TraceDebug( "Value was not UTF-8 for a char <= 0xE0FF and > 0xE000\n" ); return false; } //-- Get value of character size_t newVal = ( ( second & 0x03u ) << 6 ) | ( third & 0x3Fu ); if( ( newVal > ( UTF8_Util::UCS2_RESERVED_START + 0xFF ) ) || ( newVal < UTF8_Util::UCS2_RESERVED_START ) ) { d.TraceDebug( "value was not in our private use range: %X...\n", newVal ); return false; } //-- Remove offset newVal -= UTF8_Util::UCS2_RESERVED_START; ASSERT( newVal <= 0xFF ); //-- Copy character d.TraceDebug( "converted to: %X\n", newVal ); **ppBuf = (mbchar_t)newVal; //-- Decrement counters *pnSourceLeft -= NUM_CHARS * sizeof(mbchar_t); // Decrement Source Left *pnBufferLeft -= sizeof(mbchar_t); // Decrement Buffer Left //-- Reposition pointers (*ppSrc) += NUM_CHARS; // Skip two chars fwd (*ppBuf) += 1; // Skip one char fwd return true; */ //-- Must be our flag char size_t value = tss::util::char_to_size( **ppSrc ); if ( value != (size_t)UTF8_Util::INVALID_VALUE ) { d.TraceDebug( "value was not flag char: %X...\n", value ); return false; } //-- Get correct character from our byte array ASSERT( ! m_lb.empty() ); mbchar_t newVal = (mbchar_t)tss::util::char_to_size( m_lb.front() ); m_lb.pop_front(); //-- Copy character d.TraceDebug( "converted to: %X\n", tss::util::char_to_size( newVal ) ); **ppBuf = newVal; //-- Decrement counters *pnSourceLeft -= sizeof(mbchar_t); // Decrement Source Left *pnBufferLeft -= sizeof(mbchar_t); // Decrement Buffer Left //-- Reposition pointers (*ppSrc)++; // Skip one mbchar_t fwd (*ppBuf)++; // Skip one dbchar_t fwd return true; } private: // TODO:Get rid of this guy and just use a vector and // insert them when finished with second step std::list& m_lb; }; bool tss_ReverseConvert( iconv_t revConv, const char* pConvertedFrom, size_t nFrom, char* pConvertedTo, size_t nTo ) { cDebug d("tss_ReverseConvert< B, S, C >()"); d.TraceDebug( "Converted from: %s\n", util_output_bytes( (void*)pConvertedFrom, nFrom ).c_str() ); d.TraceDebug( "Converted to: %s\n", util_output_bytes( (void*)pConvertedTo, nTo ).c_str() ); char aBuffer[ MB_LEN_MAX ]; #ifdef _DEBUG for( size_t foo = 0; foo < sizeof( aBuffer ); foo++ ) aBuffer[ foo ] = 0xCD; #endif char* pBuf = &aBuffer[0]; size_t nBuf = sizeof( aBuffer ); const char* pSrc = pConvertedTo; size_t nSrc = nTo; size_t nConv = // NOTE: iconv( revConv, // On return, these addresses (ICONV_SOURCE_TYPE**)&pSrc, // are set for one past the last &nSrc, // "item" converted successfully &pBuf, &nBuf ); if( nConv == (size_t)-1 ) { d.TraceDebug( "reverse conversion failed: iconv error\n" ); return false; } d.TraceDebug( "sizeof( aBuffer ): %u, nBuf: %u, nFrom: %u\n", sizeof( aBuffer ), nBuf, nFrom ); d.TraceDebug( "reverse conversion got: %s\n", util_output_bytes( (void*)&aBuffer[0], sizeof( aBuffer ) - nBuf ).c_str() ); if( ( ( sizeof( aBuffer ) - nBuf ) != nFrom ) || // convertedFrom and check must be same size ( 0 != memcmp( pConvertedFrom, &aBuffer[0], nFrom ) ) ) // must be same value, too { d.TraceDebug( "reverse conversion failed: converted to a different value\n" ); return false; } return true; } template< class CharT > bool tss_IsFlaggedCharacter( CharT ch ) { return false; } template<> bool tss_IsFlaggedCharacter< dbchar_t >( dbchar_t wch ) { return cConvertUtil::ValueInReservedRange( wch ); } template< class BufferT, class SourceT > int tss_ConvertOneCharacter( iconv_t convForward, iconv_t convReverse, const char** ppSource, size_t* pnSourceLeft, char** ppBuffer, size_t* pnBufferLeft #if ( ! SUPPORTS_EXPLICIT_TEMPLATE_FUNC_INST ) , BufferT /*dummy*/, SourceT /*dummy*/ #endif ) { cDebug d("tss_ConvertOneCharacter< B, S, C >()"); const char* pSrc = *ppSource; char* pBuf = *ppBuffer; //-- HACK!!!! BAM -- see if it is one of our reserved UCS2 characters // TODO:BAM -- how does this affect our converters that don't use the UTF-8 step? if( tss_IsFlaggedCharacter( **( (SourceT**)ppSource ) ) ) { d.TraceDebug( "Found one of our unconvertable characters in the reserved range!\n" ); return -1; } //-- Try to find the number of items needed to get a complete character size_t nSrcTry; for( nSrcTry = sizeof( SourceT ); nSrcTry <= *pnBufferLeft && nSrcTry <= MB_LEN_MAX; nSrcTry += sizeof( SourceT ) ) { size_t nSrcLeft = nSrcTry; size_t nBufLeft = *pnBufferLeft;// Try to find a character in 'n' items // NOTE: On solaris sparc, iconv will attempt to NULL terminate the output, // so make sure that the buffer has space for a terminating NULL d.TraceDebug( "attempting to convert with %u items\n", nSrcTry ); size_t nConv = // NOTE: iconv( convForward, // On return, these addresses (ICONV_SOURCE_TYPE**)&pSrc, // are set for one past the last &nSrcLeft, // "item" converted successfully &pBuf, &nBufLeft ); if( nConv == (size_t)-1 ) { if( errno == EINVAL ) { d.TraceDebug( "more items needed\n" ); continue; } else { d.TraceDebug( "iconv failed with %d (not EINVAL)\n", errno ); return -1; } } else { if( tss_ReverseConvert( convReverse, *ppSource, pSrc - *ppSource, *ppBuffer, pBuf - *ppBuffer ) ) { // Modify source items to return ASSERT( nSrcLeft == 0 ); *ppSource = pSrc; *ppBuffer = pBuf; *pnSourceLeft -= nSrcTry; *pnBufferLeft = nBufLeft; return nConv; } else { d.TraceDebug( "reverse conversion failed\n" ); return -1; } } } //-- No valid character found in nBufferLeft or MB_LEN_MAX items d.TraceDebug( "no valid character found after %u items.\n", nSrcTry - 1 ); return -1; } // NOTE: is number of T's (as opposed to the number of Characters // or Bytes) So for a NTWCS sequence, Item is the number of Characters // while for a NTMBS sequence, Item is the number of char's (Bytes). // The low-down is that n????Items represents the "Count of T's" // NOTE: pBuffer should really have (nBufferItems+1) buffer items, because some // platforms use the last character to NULL terminate. template< class BufferT, class SourceT, class ConvT > int tss_Converter( iconv_t convForward, iconv_t convReverse, BufferT* pBuffer, size_t nBufferItems, const SourceT* pSource, size_t nSourceItems, ConvT& ConvertByte ) { cDebug d( "tss_Converter< BufferT, SourceT >()" ); #ifdef _DEBUG for( size_t s = nBufferItems; s; s-- ) pBuffer[s] = 0xCD; d.TraceDebug( "sizeof buffer: %d, sizeof source: %d\n", sizeof( BufferT ), sizeof( SourceT ) ); d.TraceDebug( "buffer size: %d, source size: %d\n", nBufferItems, nSourceItems ); d.TraceDebug( "source: %s\n", util_output_bytes( (void*)pSource, nSourceItems*sizeof( SourceT ) ).c_str() ); #endif #ifndef HACK_FOR_SOLARIS // See NOTE above nBufferItems++; #endif //--Get BYTES to process for in and out sequences size_t nBufferLeft = nBufferItems * sizeof(BufferT); size_t nSourceLeft = nSourceItems * sizeof(SourceT); //--Setup BYTE iterators to in and out sequences BufferT* pBuf = pBuffer; const SourceT* pSrc = pSource; while ( nSourceLeft > 0 ) { // Convert as much of the sequence as we can. SourceT* pIconvSrc = (SourceT*)pSrc; BufferT* pIconvDest = (BufferT*)pBuf; size_t nbIconvSrc = (size_t)nSourceLeft; size_t nbIconvDest = (size_t)nBufferLeft; size_t nConv = iconv( convForward, (ICONV_SOURCE_TYPE**)&pIconvSrc, &nbIconvSrc, (char**)&pIconvDest, &nbIconvDest ); if( nConv == -1 ) { // NOTE: On solaris sparc, iconv will attempt to NULL terminate the output, // so make sure that the buffer has space for a terminating NULL size_t nConv = // NOTE: #if SUPPORTS_EXPLICIT_TEMPLATE_FUNC_INST tss_ConvertOneCharacter( #else tss_ConvertOneCharacter( #endif convForward, convReverse, // On return, these addresses (const char**)&pSrc, // are set for one past the last &nSourceLeft, // "item" converted successfully (char**)&pBuf, &nBufferLeft #if ( ! SUPPORTS_EXPLICIT_TEMPLATE_FUNC_INST ) , BufferT(), SourceT() #endif ); } else { pSrc = pIconvSrc; pBuf = pIconvDest; nSourceLeft = nbIconvSrc; nBufferLeft = nbIconvDest; } if ( nConv == (size_t)-1 ) // Indidates Conversion Error! { if ( nBufferLeft <= 0 ) { d.TraceDebug( "No buffer space left\n" ); return 0; } // TODO:BAM -- handle other iconv errors! //--Must attempt Manual Conversion. ASSERT( (size_t)pSrc % sizeof( SourceT ) == 0 ); ASSERT( (size_t)pBuf % sizeof( BufferT ) == 0 ); d.TraceDebug( "conversion failed: %s\n", strerror( errno ) ); d.TraceDebug( "manually converting %X...\n", tss::util::char_to_size( *pSrc ) ); //-- Convert characters and reposition counters and pointers if( ! ConvertByte.Convert( &pBuf, &nBufferLeft, &pSrc, &nSourceLeft ) ) return 0; } } d.TraceDebug( "buffer out: %s\n", util_output_bytes( (void*)pBuffer, nBufferItems * sizeof(BufferT) - nBufferLeft ).c_str() ); return nBufferItems - (nBufferLeft / sizeof(BufferT)); } }//Unique //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cIconvConverter //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cIconvConverter::cIconvConverter() { Init(); } cIconvConverter::~cIconvConverter() { // close conversion handles cIconvUtil::CloseHandle( icFromDb ); cIconvUtil::CloseHandle( icToDb ); } /* static */ bool cIconvConverter::Test() { bool testResult = true; cDebug d( "cCodeConverter::Test()" ); // // try to get the current code page id // const char* pCP = NULL; if( ! cIconvUtil::GetCodePageID( &pCP ) ) { d.TraceDebug( "No code page.\n" ); return false; } d.TraceDebug( "Got code page %s.\n", pCP ); // // find if converters are available // testResult &= cIconvUtil::TestConverter( pCP, cIconvUtil::GetIconvDbIdentifier() ); testResult &= cIconvUtil::TestConverter( cIconvUtil::GetIconvDbIdentifier(), pCP ); return testResult; } void cIconvConverter::Init() { // NOTE:BAM -- if the platform doesn't have a XXX to UCS2 conversion, // you can make one with the genxlt command.... icToDb = cIconvUtil::OpenHandle( cIconvUtil::GetIconvDbIdentifier(), cIconvUtil::GetCodePageID() ); icFromDb = cIconvUtil::OpenHandle( cIconvUtil::GetCodePageID(), cIconvUtil::GetIconvDbIdentifier() ); } int cIconvConverter::Convert( ntmbs_t pbz, size_t nBytes, const_ntdbs_t pwz, size_t nChars ) { cDebug d("cIconvConverter::Convert( ntdbs_t -> ntmbs_t )"); if ( pbz == 0 || ( pwz == 0 && nChars ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); // // reset converter // cIconvUtil::ResetConverter( icFromDb ); // // do conversion // ByteConvert< mbchar_t, dbchar_t > bc; int nConverted = tss_Converter( icFromDb, icToDb, pbz, nBytes, pwz, nChars, bc ); if ( !nConverted ) return tss_dispatch_errno( d ); ASSERT( nConverted < nBytes ); return nConverted; } int cIconvConverter::Convert( ntdbs_t pwz, size_t nch, const_ntmbs_t pbz, size_t nBytes ) { cDebug d("cIconvConverter::Convert( ntmbs to ntdbs )"); if ( pwz == 0 || ( pbz == 0 && nBytes ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); // // reset converter // cIconvUtil::ResetConverter( icToDb ); // // do conversion // ByteConvert< dbchar_t, mbchar_t > bc; int nConverted = tss_Converter( icToDb, icFromDb, pwz, nch, pbz, nBytes, bc ); if ( !nConverted ) return tss_dispatch_errno( d ); ASSERT( nConverted <= nch ); return nConverted; } /* //-- Converts one SourceT char to one BufferT char // checks to see that no OS-implementation-defined // conversion took place... template< class BufferT, class SourceT > int tss_Convert( iconv_t dbConv, BufferT* pBuffer, size_t nBufferItems, const SourceT* pSource, size_t nSourceItems ) { // // Perform first conversion // //-- Get BYTES to process for in and out sequences size_t nBufferLeft1 = nBufferItems * sizeof(BufferT); size_t nSourceLeft1 = nSourceItems * sizeof(SourceT); //-- Set up BYTE iterators to in and out sequences char* pBuf1 = (char*)( pBuffer + 0 ); char* pSrc1 = (char*)( pSource + 0 ); //-- NOTE: On solaris sparc, iconv will attempt to NULL terminate the output, // so make sure that the buffer has space for a terminating NULL size_t nConv1 = // NOTE: ::iconv( dbConv, // On return, these addresses (ICONV_SOURCE_TYPE*) &pSrc1, // are set for one past the last &nSourceLeft1, // "item" converted successfully &pBuf1, &nBufferLeft1 ); //-- Did conversion succeed? if( nConv1 != 1 ) { d.tracedebug(); return 0; } size_t nSrcBytesUsed1 = pSrc1 - (char*)pSource; size_t nBufBytesUsed1 = pBuf1 - (char*)pBuffer; // // Make sure no implementation-defined conversion occured // So see that the reverse conversion works out to the same source char // //-- Get BYTES to process for in and out sequences SourceT secondBuf[ MB_LEN_MAX ]; size_t nBufferLeft2 = MB_LEN_MAX * sizeof( SourceT ); size_t nSourceLeft2 = ((Buffer*)pBuf) - pBuffer; //-- Set up BYTE iterators to in and out sequences char* pBuf2 = (char*)&secondBuf[0]; char* pSrc2 = (char*)pBuffer; size_t nConv2 = ::iconv( mbConv, // On return, these addresses (ICONV_SOURCE_TYPE*) &pSrc2, // are set for one past the last &nSourceLeft2, // "item" converted successfully &pBuf2, &nBufferLeft2 ); //-- Did conversion succeed? if( nConv2 != 1 ) { d.tracedebug(); return 0; } size_t nSrcBytesUsed2 = pSrc2 - (char*)&secondBuf[0]; if( ( nSrcBytesUsed1 != nSrcBytesUsed2 ) || ( 0 != memcmp( &secondBuf[0], pBuffer, nSrcBytesUsed1 ) ) ) { d.tracedebug(); return 0; } return( nSrcBytesUsed1 and nBufBytesUsed1 ); } */ //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cDoubleIconvConverter //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cDoubleIconvConverter::cDoubleIconvConverter() { Init(); } cDoubleIconvConverter::~cDoubleIconvConverter() { // close conversion handles cIconvUtil::CloseHandle( icMbToUTF8 ); cIconvUtil::CloseHandle( icUTF8ToDb ); cIconvUtil::CloseHandle( icDbToUTF8 ); cIconvUtil::CloseHandle( icUTF8ToMb ); } /* static */ bool cDoubleIconvConverter::Test() { bool testResult = true; cDebug d( "cDoubleIconvConverter::Test()" ); // // try to get the current code page id // const char* pCP = NULL; if( ! cIconvUtil::GetCodePageID( &pCP ) ) { d.TraceDebug( "No code page.\n" ); return false; } ASSERT( pCP && *pCP ); d.TraceDebug( "Got code page %s.\n", pCP ); // // find if converters are available // testResult &= cIconvUtil::TestConverter( cIconvUtil::GetMiddleIdentifier(), pCP ); testResult &= cIconvUtil::TestConverter( cIconvUtil::GetIconvDbIdentifier(), cIconvUtil::GetMiddleIdentifier() ); testResult &= cIconvUtil::TestConverter( cIconvUtil::GetMiddleIdentifier(), cIconvUtil::GetIconvDbIdentifier() ); testResult &= cIconvUtil::TestConverter( pCP, cIconvUtil::GetMiddleIdentifier() ); return testResult; } void cDoubleIconvConverter::Init() { icMbToUTF8 = cIconvUtil::OpenHandle( cIconvUtil::GetMiddleIdentifier(), cIconvUtil::GetCodePageID() ); icUTF8ToDb = cIconvUtil::OpenHandle( cIconvUtil::GetIconvDbIdentifier(), cIconvUtil::GetMiddleIdentifier() ); icDbToUTF8 = cIconvUtil::OpenHandle( cIconvUtil::GetMiddleIdentifier(), cIconvUtil::GetIconvDbIdentifier() ); icUTF8ToMb = cIconvUtil::OpenHandle( cIconvUtil::GetCodePageID(), cIconvUtil::GetMiddleIdentifier() ); } int cDoubleIconvConverter::Convert( ntmbs_t pbz, size_t nBytes, const_ntdbs_t pwz, size_t nChars ) { cDebug d("cDoubleIconvConverter::Convert( ntdbs_t -> ntmbs_t )"); // // initial checking // if ( pbz == 0 || ( pwz == 0 && nChars ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); ////////////////////////////////////////////////// // do conversion to UTF8 ////////////////////////////////////////////////// // // reset first converter // cIconvUtil::ResetConverter( icDbToUTF8 ); // // create middle buffer // size_t nBufBytes = nChars * MB_LEN_MAX; ntmbs_t pszBuffer = (ntmbs_t)::operator new( nBufBytes + 1 ); std::auto_ptr pBuf( pszBuffer ); // // do first conversion // std::list lb; // buffer for invalid bytes ToUTF8Convert< mbchar_t, dbchar_t > tc( lb ); int nConverted = tss_Converter( icDbToUTF8, icUTF8ToDb, pszBuffer, nBufBytes, pwz, nChars, tc ); if ( !nConverted ) return tss_dispatch_errno( d ); ASSERT( nConverted <= nBufBytes ); ////////////////////////////////////////////////// // do conversion to MB char ////////////////////////////////////////////////// // // reset second converter // cIconvUtil::ResetConverter( icUTF8ToMb ); // // do second conversion // FromUTF8Convert< mbchar_t, mbchar_t > fc( lb ); nConverted = tss_Converter( icUTF8ToMb, icMbToUTF8, pbz, nBytes, pszBuffer, nConverted, fc ); if ( !nConverted ) return tss_dispatch_errno( d ); ASSERT( nConverted <= nBytes ); // // return number of bytes converted // return nConverted; } int cDoubleIconvConverter::Convert( ntdbs_t pwz, size_t nch, const_ntmbs_t pbz, size_t nBytes ) { cDebug d("cDoubleIconvConverter::Convert( ntmbs to ntdbs )"); if ( pwz == 0 || ( pbz == 0 && nBytes ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); ////////////////////////////////////////////////// // do conversion to UTF-8 char ////////////////////////////////////////////////// // // reset first converter // cIconvUtil::ResetConverter( icMbToUTF8 ); // // create middle buffer // size_t nBufBytes = nBytes * MB_LEN_MAX; ntmbs_t pszBuffer = (ntmbs_t)::operator new( nBufBytes + 1 ); std::auto_ptr pBuf( pszBuffer ); // // do first conversion // std::list lb; // buffer for invalid bytes ToUTF8Convert< mbchar_t, mbchar_t > tc( lb ); int nConverted = tss_Converter( icMbToUTF8, icUTF8ToMb, pszBuffer, nBufBytes, pbz, nBytes, tc ); if ( !nConverted ) return tss_dispatch_errno( d ); ASSERT( nConverted <= nBufBytes ); ////////////////////////////////////////////////// // do conversion to DB char ////////////////////////////////////////////////// // // reset second converter // cIconvUtil::ResetConverter( icUTF8ToDb ); // // do second conversion // FromUTF8Convert< dbchar_t, mbchar_t > fc( lb ); nConverted = tss_Converter( icUTF8ToDb, icDbToUTF8, pwz, nch, pszBuffer, nConverted, fc ); if ( !nConverted ) return tss_dispatch_errno( d ); ASSERT( nConverted <= nch ); return nConverted; } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cIconvUtil //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - /* static */ iconv_t cIconvUtil::OpenHandle( const char* pTo, const char* pFrom ) { cDebug d("cIconvUtil::OpenHandle"); iconv_t ic; ic = iconv_open( pTo, pFrom ); if( ic == (iconv_t)-1 ) throw eConverterUnsupportedConversion(); d.TraceDebug( "opened %s to %s conversion\n", pFrom, pTo ); return ic; } /* static */ bool cIconvUtil::GetCodePageID( const char** ppCP ) { *ppCP = nl_langinfo( CODESET ); return( *ppCP && **ppCP ); } /* static */ const char* cIconvUtil::GetCodePageID() { const char* pCurCodePage; if( ! GetCodePageID( &pCurCodePage ) ) return NULL; return pCurCodePage; } /* static */ const char* cIconvUtil::GetIconvDbIdentifier() { #ifdef WORDS_BIGENDIAN return "UTF-16BE"; #else return "UTF-16LE"; #endif } /* static */ const char* cIconvUtil::GetMiddleIdentifier() { return "UTF-8"; } /* static */ void cIconvUtil::ResetConverter( iconv_t ic ) { char* p = 0; size_t s = 0; size_t i = iconv( ic, (ICONV_SOURCE_TYPE**) &p, &s, &p, &s ); if ( i == (size_t)-1 ) { ASSERT( false ); throw eConverterReset(); } } /* static */ void cIconvUtil::CloseHandle( iconv_t ic ) { int ret = iconv_close( ic ); ASSERT( ret != -1 ); } /* static */ bool cIconvUtil::TestConverter( const char* pTo, const char* pFrom ) { if (!pTo || !pFrom) return false; cDebug d( "cIconvUtil::TestConverter()" ); iconv_t i = iconv_open( pTo, pFrom ); if( (iconv_t)-1 == i ) { d.TraceDebug( "No %s to %s conversion.\n", pFrom, pTo ); tss_dispatch_iconvopen_error(); return false; } else { d.TraceDebug( "Valid conversion for %s to %s.\n", pFrom, pTo ); cIconvUtil::CloseHandle( i ); return true; } } #endif//TSS_USE_ICONV_CCONV16 //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cWcharIs32BitUcs2Converterer //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - namespace /*Unique*/ { //-- Ensures 1-to-1 mb to wide character mapping by doing a reverse conversion // and comparing the results int tss_mbtowc( wchar_t* pwch, const mbchar_t* pch, size_t nBytes ) { // convert forward int nNarrow = ::mbtowc( pwch, pch, nBytes ); if( nNarrow == -1 ) return -1; // convert backwards mbchar_t ach[ MB_LEN_MAX ]; int nWide = ::wctomb( ach, *pwch ); if( nWide == -1 ) return -1; // compare... if( ( nNarrow > (int)nBytes ) || ( 0 != memcmp( ach, pch, nNarrow ) ) ) return -1; // success! return nNarrow; } //-- Ensures 1-to-1 mb to wide character mapping by doing a reverse conversion // and comparing the results int tss_wctomb( mbchar_t* pch, wchar_t wch ) { // convert forward int nWide = ::wctomb( pch, wch ); if( nWide == -1 ) return -1; // convert backwards wchar_t wchTest; int nNarrow = ::mbtowc( &wchTest, pch, MB_CUR_MAX ); if( nNarrow == -1 ) return -1; // compare... if( wchTest != wch ) return -1; // success! return nWide; } int tss_wcstombs( ntmbs_t pbz, const_ntwcs_t pwz, size_t nCount ) { cDebug d("tss_wcstombs"); size_t nConv; size_t N; for ( nConv = 0, N = 0; *pwz; ++pwz, pbz += N, ++nConv ) { N = tss_wctomb( pbz, *pwz ); if ( N == (size_t)-1 ) { *pbz = cConvertUtil::ConvertNonChar( (dbchar_t)*pwz ); N = 1; } } return (int)nConv; } int tss_mbstowcs( ntwcs_t pwz, const_ntmbs_t pbz, size_t nBytes ) { cDebug d("tss_mbstowcs"); size_t N; size_t nConv; const_ntmbs_t end = &pbz[nBytes]; for ( nConv = 0, N = 0; *pbz; pbz += N, ++pwz, ++nConv ) { N = tss_mbtowc( pwz, pbz, end - pbz ); if ( N == (size_t)-1 ) { d.TraceDebug( "manually converting %X...\n", cConvertUtil::ConvertNonChar( *pbz ) ); *pwz = (wchar_t)cConvertUtil::ConvertNonChar( *pbz ); N = 1; } } return (int)nConv; } }//Unique #if WCHAR_IS_32_BITS int cWcharIs32BitUcs2Converterer::Convert( ntmbs_t pbz, size_t nBytes, const_ntdbs_t pwz, size_t nCount ) { if ( pbz == 0 || ( pwz == 0 && nCount ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); // cast our dbchar_t to wchar_t's first ntwcs_t pwzBuffer = new wchar_t[ nCount + 1 ]; for( size_t n = 0; n < nCount; n++ ) pwzBuffer[n] = pwz[n]; pwzBuffer[nCount] = 0x00; // null terminate int nConv = tss_wcstombs( pbz, pwzBuffer, nBytes ); if ( (size_t)nConv == (size_t)-1 ) { cDebug d("cWcharIs32BitUcs2Converterer::Convert( ntdbs to ntmbs )"); char* psz = new char[ nCount * 2 ]; char* at = psz; while ( *pwzBuffer ) *at++ = *pwzBuffer++; *at = 0x00; d.TraceDebug( "Invalid Input: [%s]\n", psz ); d.TraceDebug( "%s\n", util_output_bytes( (void*)pwz, nCount*sizeof(wchar_t)).c_str() ); throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_BADCHAR ) ); } return nConv; } int cWcharIs32BitUcs2Converterer::Convert( ntdbs_t pwz, size_t nCount, const_ntmbs_t pbz, size_t nBytes ) { cDebug d("cWcharIs32BitUcs2Converterer::Convert( ntmbs to ntdbs )"); // Validate Input if ( pwz == 0 || ( pbz == 0 && nBytes ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); // mb to wc to a buffer of wide chars then.... wchar_t* pwzBuffer = new wchar_t[ nCount ]; int nConv = tss_mbstowcs( pwzBuffer, pbz, nCount ); if ( nConv == -1 ) { d.TraceDebug( "Invalid Input: [%s]\n", pbz ); throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_BADCHAR ) ); } // ...cast those chars to dbchar_ts for( size_t at = 0; at < (size_t)nConv; ++at, ++pwzBuffer ) { wchar_t wch = *pwzBuffer; if ( wch > 0xFFFF ) { d.TraceDebug("found wchar_T > 0xFFFF: %X\n", wch ); throw eConverterFatal( _T("Cannot truncate wchar_t to dbchar_t") ); } pwz[ at ] = (dbchar_t)wch; } pwz[nConv] = 0x00; return nConv; } #endif // WCHAR_IS_32_BITS //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cWcharIs16BitUcs2Converterer //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if WCHAR_IS_16_BITS int cWcharIs16BitUcs2Converterer::Convert( ntmbs_t pbz, size_t nbMB, const_ntdbs_t pwz, size_t nch ) { // Validate Input if ( pbz == 0 || ( pwz == 0 && nch ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); int nConverted = tss_wcstombs( pbz, pwz, nbMB ); if ( nConverted == -1 ) { cDebug d("cWcharIs16BitUcs2Converterer[ntdbs->ntmbs]"); ntmbs_t psz = new char[ nch * 2 ]; ntmbs_t at = psz; while ( *pwz ) *at++ = (mbchar_t)*pwz++; *at = 0x00; d.TraceDebug( "Invalid Input: [%s]\n", psz ); throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_BADCHAR ) ); } return nConverted; } int cWcharIs16BitUcs2Converterer::Convert( ntdbs_t pwz, size_t nch, const_ntmbs_t pbz, size_t nBytes ) { // Validate Input if ( pbz == 0 || ( pwz == 0 && nch ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); int nConverted = tss_mbstowcs( pwz, pbz, nch ); if ( nConverted == -1 ) { cDebug d("cWcharIs16BitUcs2Converterer::Convert( ntmbs to ntdbs )"); d.TraceDebug( "Invalid Input: [%s]\n", pbz ); throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_BADCHAR ) ); } return nConverted; } #endif // WCHAR_IS_16_BITS int cGoodEnoughConverterer::Convert( ntmbs_t pbz, size_t nBytes, const_ntdbs_t pwz, size_t nCount ) { if ( pbz == 0 || ( pwz == 0 && nCount ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); char* at = pbz; const dbchar_t* dat = pwz; while ( *dat ) { if( *dat > 0xFF ) { *at = cConvertUtil::ConvertNonChar( *dat ); } else { *at = (char)*dat; } at++; dat++; } *at = 0x00; return( (size_t)at - (size_t)pbz ); } int cGoodEnoughConverterer::Convert( ntdbs_t pwz, size_t nCount, const_ntmbs_t pbz, size_t nBytes ) { // Validate Input if ( pwz == 0 || ( pbz == 0 && nBytes ) ) throw eConverterFatal( TSS_GetString( cCore, core::STR_ERR_ISNULL ) ); const char* at = pbz; dbchar_t* dat = pwz; while ( *at ) { if( (unsigned char)*at > (unsigned char)0x7Fu ) { *dat = cConvertUtil::ConvertNonChar( *at ); } else { *dat = (uint16)(unsigned char)*at; } dat++; at++; } *dat = 0x0000; return( ( (size_t)dat - (size_t)pwz ) / sizeof( dbchar_t ) ); } dbchar_t cConvertUtil::ConvertNonChar( mbchar_t ch ) { cDebug d("cConvertUtil::ConvertNonChar( mbchar_t to dbchar_t )"); if( ! ValueInReservedRange( ch ) ) { d.TraceDebug( "Invalid Input: [%X]\n", ch ); throw eConverterFatal( _T("mbchar_t is not high ascii!") ); } dbchar_t wch = (dbchar_t)( tss::util::char_to_size( ch ) + TSS_UCS2_RESERVED_START ); d.TraceDebug( "Converted 0x%08X to 0x%08X\n", tss::util::char_to_size( ch ), tss::util::char_to_size( wch ) ); return( wch ); } mbchar_t cConvertUtil::ConvertNonChar( dbchar_t wch ) { cDebug d("cConvertUtil::ConvertNonChar( dbchar_t to mbchar_t )"); if( ! ValueInReservedRange( wch ) ) { d.TraceDebug( "Invalid Input: [%X]\n", wch ); throw eConverterFatal( _T("Cannot truncate dbchar_t to mbchar_t") ); } mbchar_t ch = (mbchar_t)( wch - TSS_UCS2_RESERVED_START ); d.TraceDebug( "Converted 0x%08X to 0x%08X\n", tss::util::char_to_size( wch ), tss::util::char_to_size( ch ) ); return( ch ); } bool cConvertUtil::ValueInReservedRange( dbchar_t wch ) { size_t s = tss::util::char_to_size( wch ); return( ( s >= TSS_UCS2_RESERVED_START ) && ( s <= TSS_UCS2_RESERVED_END ) ); } bool cConvertUtil::ValueInReservedRange( mbchar_t ch ) { size_t s = tss::util::char_to_size( ch ); return( ( s >= TSS_HIGH_ASCII_START ) && ( s <= TSS_HIGH_ASCII_END ) ); }