From 43d2ff1a040a7f941560fd3b7125c25c636a2d23 Mon Sep 17 00:00:00 2001 From: Brian Cox Date: Wed, 1 Jun 2016 22:08:25 -0700 Subject: [PATCH] Display multibyte characters properly, and accept them in interactive db update. --- src/core/charutil.cpp | 69 +++---------------------------- src/tw/textreportviewer.cpp | 42 ++++++++++++------- src/twtest/textreportviewer_t.cpp | 4 +- src/twtest/twutil_t.cpp | 4 +- 4 files changed, 36 insertions(+), 83 deletions(-) diff --git a/src/core/charutil.cpp b/src/core/charutil.cpp index e001c9d..742e371 100644 --- a/src/core/charutil.cpp +++ b/src/core/charutil.cpp @@ -41,28 +41,6 @@ #include "charutil.h" #include "ntmbs.h" -/* NOW WE USE tss::strinc -// like mblen but also for wchar_t -int util_tlen( const TCHAR* cur, size_t count ) -{ - int nch = -2; // 'unused' value - - ASSERT( count >= 0 ); - #ifdef _UNICODE - if( count > 0 ) - nch = 1; // next char is always one TCHAR long - else - nch = 0; // no more chars - #else - nch = ::mblen( cur, count ); // here sizeof(TCHAR) == 1 - #endif - - ASSERT( nch != -2 ); // make sure nch was set - return nch; -} -*/ - - // // finds the next whole character in string identified by ['cur'-'end') // identifies beginning of char in 'first', then end of character in 'last' @@ -105,51 +83,16 @@ bool cCharUtil::PeekNextChar( const TSTRING::const_iterator& cur, } first = cur; -// last = tss::strinc( cur ); - last = *cur ? cur + 1 : cur; + mblen (NULL, 0); + int len = mblen(&*cur, MB_CUR_MAX); + if (len < 0) //invalid multibyte sequence, but let's not blow up. + len = 1; + + last = cur + len; return true; } -/* OLD way of doing this... - NOW WE USE tss::strinc -int cCharUtil::PeekNextChar( const TSTRING::const_iterator& cur, - const TSTRING::const_iterator& end, - TSTRING::const_iterator& first, - TSTRING::const_iterator& last, - bool fThrowOnError ) -{ - // - // do we have a valid string here? - // - if( cur > end ) - return -1; - - // - // determine length of character in TCHARs - // - int charSizeInTCHARs = util_tlen( cur, (size_t)end - (size_t)cur ); - if( charSizeInTCHARs == -1 ) // TODO:BAM -- what if size is zero? does that make sense? - { - if( fThrowOnError ) - throw eCharUtilUnrecognizedChar(); - else - return -1; - } - - // - // denote beginning and end of character - // - first = cur; // first char always starts at 'cur' - last = first + charSizeInTCHARs; - - // - // there exist more characters - // - return charSizeInTCHARs; -} -*/ - //============================================================================= // // /* static */ diff --git a/src/tw/textreportviewer.cpp b/src/tw/textreportviewer.cpp index a43de25..05eac5f 100644 --- a/src/tw/textreportviewer.cpp +++ b/src/tw/textreportviewer.cpp @@ -844,7 +844,7 @@ void cTextReportViewer::GetGenreInfo( FCOList** ppCurList ) cGenre::Genre g = cGenreSwitcher::GetInstance()->StringToGenre( strGenre.c_str() ); if( cGenre::GENRE_INVALID == g) { - throw eTextReportViewerReportCorrupt(); // TODO: ERR_UKNOWN_GENRE + throw eTextReportViewerReportCorrupt("Invalid Genre"); // TODO: ERR_UKNOWN_GENRE } cGenreSwitcher::GetInstance()->SelectGenre( g ); @@ -853,7 +853,7 @@ void cTextReportViewer::GetGenreInfo( FCOList** ppCurList ) // GenreList::iterator curIter = mFCOsRemoveFromReport.find( g ); if( curIter == mFCOsRemoveFromReport.end() ) - throw eTextReportViewerReportCorrupt(); // TODO: ERR_UKNOWN_GENRE + throw eTextReportViewerReportCorrupt("No files found in report"); // TODO: ERR_UKNOWN_GENRE *ppCurList = curIter->second; // @@ -866,7 +866,7 @@ void cTextReportViewer::GetGenreInfo( FCOList** ppCurList ) void cTextReportViewer::GetBallotInfo( FCOList* pCurList ) { if( ! pCurList ) - throw eTextReportViewerReportCorrupt(); + throw eTextReportViewerReportCorrupt("No ballot list found"); // if the box is checked, then the user elected to leave the item // in the report, so we do nothing. If the box isn't checked, then @@ -885,7 +885,7 @@ void cTextReportViewer::GetBallotInfo( FCOList* pCurList ) iter = pCurList->find( fcoName ); if( iter == pCurList->end() ) { - throw eTextReportViewerReportCorrupt();// TODO: ERR_UKNOWN_NAME + throw eTextReportViewerReportCorrupt("Unknown file name in report");// TODO: ERR_UKNOWN_NAME } pCurList->erase( fcoName ); @@ -979,7 +979,7 @@ void cTextReportViewer::GetFCONameFromBallotLine( cFCOName& fcoName ) //throw (e else if( TW_IS_EOL( chIn ) ) { // if EOL, there was no name! - throw eTextReportViewerReportCorrupt(); + throw eTextReportViewerReportCorrupt("Ballot item without a name"); } else { @@ -1014,7 +1014,8 @@ void cTextReportViewer::GetFCONameFromBallotLine( cFCOName& fcoName ) //throw (e cStringUtil::StrToTstr( strFCOName ), fcoName ) ) { - throw eTextReportViewerReportCorrupt(); // TODO -- it might be nice to be able to specify what line of the report got corrupted + std::string msg = "Invalid object name: " + strFCOName; + throw eTextReportViewerReportCorrupt(msg); // TODO -- it might be nice to be able to specify what line of the report got corrupted } } @@ -1118,7 +1119,7 @@ void cTextReportViewer::RemoveFCOsFromReport() //throw (eTextReportViewer) if( nFCOsToRemove != nFCOsRemoved ) { // TODO -- maybe have a different enumeration for this? - throw eTextReportViewerReportCorrupt(); + throw eTextReportViewerReportCorrupt("Mismatch in objects to remove"); } } @@ -1942,20 +1943,21 @@ void cTextReportViewer::GetChar() // initialize mCurrentChar mCurrentCharSize = 0; - for( int i = 0; i < (int)sizeof( mCurrentChar ); i++ ) + for( uint32 i = 0; i < sizeof( mCurrentChar ); i++ ) mCurrentChar[i] = 0; static const std::istream::char_type eof = - std::char_traits< char >::to_char_type( - std::char_traits< char >::eof() ); + std::char_traits< char >::to_char_type(std::char_traits< char >::eof() ); + std::streampos pos = mpIn->tellg(); + for( size_t nch = 0; nch < (size_t)MB_CUR_MAX; nch++ ) { if( mpIn->eof() || PeekIsEOF() ) { // should be first byte we read if( nch != 0 ) - throw eTextReportViewerReportCorrupt(); + throw eTextReportViewerReportCorrupt("Expected EOF"); if( PeekIsEOF() ) { @@ -1975,13 +1977,11 @@ void cTextReportViewer::GetChar() if( ! mpIn->good() ) { d.TraceDebug( _T("Input stream error.\n") ); - throw eTextReportViewerReportCorrupt(); + throw eTextReportViewerReportCorrupt("Input stream error"); } // get character from input stream - std::istream::char_type ch = - std::char_traits::to_char_type( - mpIn->get() ); + std::istream::char_type ch = std::char_traits::to_char_type(mpIn->get()); // add character to mb buffer mCurrentChar[nch] = ch; @@ -1999,6 +1999,16 @@ void cTextReportViewer::GetChar() } } } + + mpIn->seekg( pos ); + std::istream::char_type c = std::char_traits::to_char_type( mpIn->get() ); + if( (unsigned char)c > 0x7f ) + { + mCurrentChar[0] = c; + mCurrentChar[1] = 0; + mCurrentCharSize = 1; + return; + } // sequence was not a valid mb character // (searched MB_CUR_MAX chars and didn't find a complete mb character) @@ -2008,7 +2018,7 @@ void cTextReportViewer::GetChar() d.TraceDebug( _T("%u\n"), (size_t)(unsigned char)mCurrentChar[j] ); #endif ASSERT( false ); - throw eTextReportViewerReportCorrupt(); + throw eTextReportViewerReportCorrupt("Invalid multibyte sequence"); } void cTextReportViewer::AppendChar( std::string& str ) diff --git a/src/twtest/textreportviewer_t.cpp b/src/twtest/textreportviewer_t.cpp index dfa13da..c6287e0 100644 --- a/src/twtest/textreportviewer_t.cpp +++ b/src/twtest/textreportviewer_t.cpp @@ -418,8 +418,8 @@ void MakeFile( TSTRING& strNameMakeMe ) std::string strA; for( TSTRING::iterator i = strNameMakeMe.begin(); i != strNameMakeMe.end(); i++ ) { - char ach[4]; - ASSERT( MB_CUR_MAX <= 4 ); + char ach[6]; + ASSERT( MB_CUR_MAX <= 6 ); int n = wctomb( ach, *i ); ASSERT( n != -1 ); diff --git a/src/twtest/twutil_t.cpp b/src/twtest/twutil_t.cpp index c96e4e4..8eb0cd2 100644 --- a/src/twtest/twutil_t.cpp +++ b/src/twtest/twutil_t.cpp @@ -120,8 +120,8 @@ std::string WideToNarrow( const TSTRING& strWide ) std::string strA; for( TSTRING::const_iterator i = strWide.begin(); i != strWide.end(); i++ ) { - char ach[4]; - ASSERT( MB_CUR_MAX <= 4 ); + char ach[6]; + ASSERT( MB_CUR_MAX <= 6 ); int n = wctomb( ach, *i ); ASSERT( n != -1 );