// // The developer of the original code and/or files is Tripwire, Inc. // Portions created by Tripwire, Inc. are copyright (C) 2000-2018 Tripwire, // Inc. Tripwire is a registered trademark of Tripwire, Inc. All rights // reserved. // // This program is free software. The contents of this file are subject // to the terms of the GNU General Public License as published by the // Free Software Foundation; either version 2 of the License, or (at your // option) any later version. You may redistribute it and/or modify it // only in compliance with the GNU General Public License. // // This program is distributed in the hope that it will be useful. // However, this program is distributed AS-IS WITHOUT ANY // WARRANTY; INCLUDING THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS // FOR A PARTICULAR PURPOSE. Please see the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, // USA. // // Nothing in the GNU General Public License or any other license to use // the code or files shall permit you to use Tripwire's trademarks, // service marks, or other intellectual property without Tripwire's // prior written consent. // // If you have any questions, please contact Tripwire, Inc. at either // info@tripwire.org or www.tripwire.org. // /////////////////////////////////////////////////////////////////////////////// // displayencoder.cpp // //========================================================================= // INCLUDES //========================================================================= #include "stdcore.h" #include "displayencoder.h" #include "charutil.h" #include "debug.h" #include "twlocale.h" #include "stringutil.h" #include "errorutil.h" #include "ntmbs.h" #include "codeconvert.h" //========================================================================= // STANDARD LIBRARY INCLUDES //========================================================================= #include #include //========================================================================= // DEFINES AND MACROS //========================================================================= // uncomment this to test schema // #define TSS_DO_SCHEMA_VALIDATION ////////////////////////////////////////////////////////////////////////////// // ENCODER UTILITIES ////////////////////////////////////////////////////////////////////////////// inline bool IsSingleTCHAR(TSTRING::const_iterator first, TSTRING::const_iterator last) { return (first + 1 == last); } ////////////////////////////////////////////////////////////////////////////// // CHAR ENCODER INTERFACE ////////////////////////////////////////////////////////////////////////////// // all derived classes should encode a char to "EscapeChar() + Identifier() + Encode( char ) [ + Identifier() ]" class iCharEncoder { public: virtual ~iCharEncoder(){}; virtual bool NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const = 0; // Determines if character identified by [first,last) needs encoding. // Returns true if it does. virtual TSTRING EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const = 0; // Encodes character identified by [first,last) in such a way that it // can be decoded by Decode(). Returns encoded character sequence. virtual TSTRING EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const = 0; // Encodes character identified by [first,last) in a manner that is not roundtrip, // but looks good. Returns encoded character sequence. virtual TSTRING Decode(TSTRING::const_iterator* pcur, const TSTRING::const_iterator end) const = 0; // Decodes character sequence beginning with '*pcur' and ending before 'end'. // Returns decoded character or sequence of characters. Advances *pcur beyond // the last character decoded. virtual TCHAR Identifier() const = 0; static TCHAR EscapeChar() { return char_escape; } protected: static TCHAR char_escape; }; class cNonNarrowableCharEncoder : public iCharEncoder { public: virtual ~cNonNarrowableCharEncoder() { } virtual bool NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING Decode(TSTRING::const_iterator* cur, const TSTRING::const_iterator end) const; virtual TCHAR Identifier() const; private: static TCHAR char_identifier; static TCHAR char_replace; }; class cNonPrintableCharEncoder : public iCharEncoder { public: cNonPrintableCharEncoder(bool f_allowWS) : m_allowWS(f_allowWS){}; virtual ~cNonPrintableCharEncoder() { } virtual bool NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING Decode(TSTRING::const_iterator* cur, const TSTRING::const_iterator end) const; virtual TCHAR Identifier() const; private: static TCHAR char_identifier; static TCHAR char_replace; bool m_allowWS; }; class cQuoteCharEncoder : public iCharEncoder { public: virtual ~cQuoteCharEncoder() { } virtual bool NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING Decode(TSTRING::const_iterator* cur, const TSTRING::const_iterator end) const; virtual TCHAR Identifier() const; private: static TCHAR char_test; static TCHAR char_identifier; static TCHAR char_replace; }; class cBackslashCharEncoder : public iCharEncoder { public: virtual ~cBackslashCharEncoder() { } virtual bool NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const; virtual TSTRING Decode(TSTRING::const_iterator* cur, const TSTRING::const_iterator end) const; virtual TCHAR Identifier() const; private: static TCHAR char_test; static TCHAR char_identifier; static TCHAR char_replace; }; ////////////////////////////////////////////////////////////////////////////// // CHARACTER SPECIALIZATIONS ////////////////////////////////////////////////////////////////////////////// TCHAR iCharEncoder::char_escape = _T('\\'); TCHAR cNonNarrowableCharEncoder::char_identifier = _T('x'); TCHAR cNonPrintableCharEncoder::char_identifier = _T('x'); TCHAR cQuoteCharEncoder::char_identifier = _T('\"'); TCHAR cBackslashCharEncoder::char_identifier = _T('\\'); TCHAR cBackslashCharEncoder::char_test = cBackslashCharEncoder::char_identifier; TCHAR cQuoteCharEncoder::char_test = cQuoteCharEncoder::char_identifier; TCHAR cBackslashCharEncoder::char_replace = cBackslashCharEncoder::char_identifier; TCHAR cQuoteCharEncoder::char_replace = cQuoteCharEncoder::char_identifier; TCHAR cNonNarrowableCharEncoder::char_replace = _T('?'); TCHAR cNonPrintableCharEncoder::char_replace = _T('?'); ////////////////////////////////////////////////////////////////////////////// // TESTS ////////////////////////////////////////////////////////////////////////////// bool cNonNarrowableCharEncoder::NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const { return false; // all chars are narrow } bool cNonPrintableCharEncoder::NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const { // TODO:BAM -- handle this with mb chars // std::isprint does a wctob() on the wchar!!?!?! // what's up with that? Maybe ignore this all together and // just do a isprint like KAI does? // HYPOTHESIS: all mb characters are printable. only sb ASCII // chars that would have C isprint() return false actually aren't printable // So escape chars, and tabs and such are only in sb chars that C isprint() would check. // HMMMM: true in all locales, though? (LC_CTYPE is checked for C isprint(), though...) // Sooooo... it should be something like // // #ifdef _UNICODE // char nch = wctob( ch ); // return( nch != EOF && ! isprint( nch ) ); // #else // return( ! isprint( ch ) ); // #endif // // assuming all unprintable chars are one TCHAR long if (!IsSingleTCHAR(first, last)) return false; if (m_allowWS && cCharEncoderUtil::IsWhiteSpace(*first)) return false; return cCharEncoderUtil::IsPrintable(*first); } bool cQuoteCharEncoder::NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const { return (IsSingleTCHAR(first, last) && (*first == char_test)); } bool cBackslashCharEncoder::NeedsEncoding(TSTRING::const_iterator first, TSTRING::const_iterator last) const { return (IsSingleTCHAR(first, last) && (*first == char_test)); } ////////////////////////////////////////////////////////////////////////////// // ROUNDTRIP ENCODINGS ////////////////////////////////////////////////////////////////////////////// TSTRING cNonNarrowableCharEncoder::EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const { TSTRING str; str += char_escape; str += char_identifier; str += cCharEncoderUtil::CharStringToHexValue(TSTRING(first, last)); return str; } TSTRING cNonPrintableCharEncoder::EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const { ASSERT(IsSingleTCHAR(first, last)); // non-prints are single char (see NOTE above) TSTRING str; str += char_escape; str += char_identifier; str += cCharEncoderUtil::CharStringToHexValue(TSTRING(first, last)); return str; } TSTRING cQuoteCharEncoder::EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const { // should just be a quote ASSERT(IsSingleTCHAR(first, last)); ASSERT(*first == char_test); TSTRING str; str += char_escape; str += char_identifier; return str; } TSTRING cBackslashCharEncoder::EncodeRoundtrip(TSTRING::const_iterator first, TSTRING::const_iterator last) const { // should just be a backslash ASSERT(IsSingleTCHAR(first, last)); ASSERT(*first == char_test); TSTRING str; str += char_escape; str += char_identifier; return str; } ////////////////////////////////////////////////////////////////////////////// // NON-ROUNDTRIP ENCODINGS ////////////////////////////////////////////////////////////////////////////// TSTRING cNonNarrowableCharEncoder::EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const { return EncodeRoundtrip(first, last); } TSTRING cNonPrintableCharEncoder::EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const { return EncodeRoundtrip(first, last); } TSTRING cQuoteCharEncoder::EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const { // should just be a quote ASSERT(IsSingleTCHAR(first, last)); ASSERT(*first == char_test); return TSTRING(1, char_replace); } TSTRING cBackslashCharEncoder::EncodePretty(TSTRING::const_iterator first, TSTRING::const_iterator last) const { // should just be a backslash ASSERT(IsSingleTCHAR(first, last)); ASSERT(*first == char_test); return TSTRING(1, char_replace); } ////////////////////////////////////////////////////////////////////////////// // DECODINGS ////////////////////////////////////////////////////////////////////////////// TSTRING cNonNarrowableCharEncoder::Decode(TSTRING::const_iterator* pcur, const TSTRING::const_iterator end) const { // check preconditions if ((*pcur) >= end || *(*pcur) != Identifier()) ThrowAndAssert(eBadDecoderInput()); return (cCharEncoderUtil::DecodeHexToChar(pcur, end)); } TSTRING cNonPrintableCharEncoder::Decode(TSTRING::const_iterator* pcur, const TSTRING::const_iterator end) const { // check preconditions if ((*pcur) >= end || *(*pcur) != Identifier()) ThrowAndAssert(eBadDecoderInput()); return (cCharEncoderUtil::DecodeHexToChar(pcur, end)); } TSTRING cQuoteCharEncoder::Decode(TSTRING::const_iterator* pcur, const TSTRING::const_iterator end) const { if ((*pcur) >= end || *(*pcur) != Identifier()) ThrowAndAssert(eBadDecoderInput()); (*pcur)++; // advance past part decoded return TSTRING(1, Identifier()); } TSTRING cBackslashCharEncoder::Decode(TSTRING::const_iterator* pcur, const TSTRING::const_iterator end) const { if ((*pcur) >= end || *(*pcur) != Identifier()) ThrowAndAssert(eBadDecoderInput()); (*pcur)++; // advance past part decoded return TSTRING(1, Identifier()); } ////////////////////////////////////////////////////////////////////////////// // IDENTIFIERS ////////////////////////////////////////////////////////////////////////////// TCHAR cNonNarrowableCharEncoder::Identifier() const { return char_identifier; } TCHAR cNonPrintableCharEncoder::Identifier() const { return char_identifier; } TCHAR cQuoteCharEncoder::Identifier() const { return char_identifier; } TCHAR cBackslashCharEncoder::Identifier() const { return char_identifier; } ////////////////////////////////////////////////////////////////////////////// // UTILITIES ////////////////////////////////////////////////////////////////////////////// bool cCharEncoderUtil::IsWhiteSpace(TCHAR ch) { return (ch == '\r' || ch == '\n' || ch == '\t' || ch == '\v' || ch == ' '); } bool cCharEncoderUtil::IsPrintable(TCHAR ch) { #if USE_CLIB_LOCALE && !defined(__APPLE__) return (!isprint(ch)); // kludge for KAI #else // USE_CLIB_LOCALE return (!std::isprint(ch, std::locale())); #endif // USE_CLIB_LOCALE } TSTRING cCharEncoderUtil::CharStringToHexValue(const TSTRING& str) { TSTRING strOut; TSTRING::const_iterator at; for (at = str.begin(); at < str.end(); at++) { strOut += char_to_hex(*at); } return strOut; } TSTRING cCharEncoderUtil::HexValueToCharString(const TSTRING& str) { TSTRING strOut; TSTRING::const_iterator at; for (at = str.begin(); at < str.end(); at += TCHAR_AS_HEX__IN_TCHARS) { strOut += hex_to_char(at, at + TCHAR_AS_HEX__IN_TCHARS); } return strOut; } TCHAR cCharEncoderUtil::hex_to_char(TSTRING::const_iterator first, TSTRING::const_iterator last) { static const TCHAR max_char = std::numeric_limits::max(); static const TCHAR min_char = std::numeric_limits::min(); if (first + TCHAR_AS_HEX__IN_TCHARS != last) ThrowAndAssert(eBadHexConversion()); TISTRINGSTREAM ss(TSTRING(first, last)); ss.imbue(std::locale::classic()); ss.fill(_T('0')); ss.setf(std::ios_base::hex, std::ios_base::basefield); unsigned long ch; ss >> ch; if (ss.bad() || ss.fail()) ThrowAndAssert(eBadHexConversion(TSTRING(first, last))); if ((TCHAR)ch > max_char || (TCHAR)ch < min_char) ThrowAndAssert(eBadHexConversion(TSTRING(first, last))); return (TCHAR)ch; } TSTRING cCharEncoderUtil::char_to_hex(TCHAR ch) { TOSTRINGSTREAM ss; ss.imbue(std::locale::classic()); ss.fill(_T('0')); ss.width(TCHAR_AS_HEX__IN_TCHARS); ss.setf(std::ios_base::hex, std::ios_base::basefield); ss << tss::util::char_to_size(ch); if (ss.bad() || ss.fail() || ss.str().length() != TCHAR_AS_HEX__IN_TCHARS) ThrowAndAssert(eBadHexConversion(TSTRING(1, ch))); return ss.str(); } TSTRING cCharEncoderUtil::DecodeHexToChar(TSTRING::const_iterator* pcur, const TSTRING::const_iterator end) { // get hex numbers -- 2 chars TSTRING str; size_t n = 0; for ((*pcur)++; n < TCHAR_AS_HEX__IN_TCHARS && (*pcur) != end; n++, (*pcur)++) { str += *(*pcur); } if (n != TCHAR_AS_HEX__IN_TCHARS) ThrowAndAssert(eBadDecoderInput()); // convert hex numbers return HexValueToCharString(str); } ////////////////////////////////////////////////////////////////////////////// // ENCODER MEMBERS ////////////////////////////////////////////////////////////////////////////// cEncoder::cEncoder(int e, int f) : m_fFlags(f) { // add encodings if (e & NON_NARROWABLE) m_encodings.push_back(new cNonNarrowableCharEncoder); if (e & NON_PRINTABLE) m_encodings.push_back(new cNonPrintableCharEncoder(AllowWhiteSpace())); if (e & BACKSLASH) m_encodings.push_back(new cBackslashCharEncoder); if (e & DBL_QUOTE) m_encodings.push_back(new cQuoteCharEncoder); // assert that we weren't passed anything freaky ASSERT(0 == (e & ~(NON_NARROWABLE | NON_PRINTABLE | BACKSLASH | DBL_QUOTE))); // add flags ASSERT(!((m_fFlags & ROUNDTRIP) && (m_fFlags & NON_ROUNDTRIP))); #ifdef TSS_DO_SCHEMA_VALIDATION // check assumptions about encodings ValidateSchema(); #endif } cEncoder::~cEncoder() { sack_type::iterator itr; for (itr = m_encodings.begin(); itr != m_encodings.end(); ++itr) delete *itr; } bool cEncoder::RoundTrip() const { return (0 != (m_fFlags & ROUNDTRIP)); } bool cEncoder::AllowWhiteSpace() const { return (0 != (m_fFlags & ALLOW_WHITESPACE)); } ////////////////////////////////////////////////////////////////////////////// // ENCODER BASIC FUNCTIONALITY ////////////////////////////////////////////////////////////////////////////// void cEncoder::Encode(TSTRING& strIn) const { // TODO:BAM -- reserve space for strOut as an optimization? TSTRING strOut; // encoded string we will build up TSTRING::const_iterator cur = strIn.begin(); // pointer to working position in strIn const TSTRING::const_iterator end = strIn.end(); // end of strIn TSTRING::const_iterator first = end; // identifies beginning of current character TSTRING::const_iterator last = end; // identifies end of current character // while get next char (updates cur) while (cCharUtil::PopNextChar(cur, end, first, last)) { bool fCharEncoded = false; // answers: did char need encoding? sack_type::const_iterator atE; // for all encoders for (atE = m_encodings.begin(); atE != m_encodings.end(); atE++) { // does char need encoding? if ((*atE)->NeedsEncoding(first, last)) { strOut += Encode(first, last, atE); fCharEncoded = true; break; // each char should only fail at most one // encoding test, so it should be cool to quit } } if (!fCharEncoded) { strOut.append(first, last); // simply add current char to output since it needed no encoding } } // pass back encoded string strIn = strOut; } TSTRING cEncoder::Encode(TSTRING::const_iterator first, TSTRING::const_iterator last, sack_type::const_iterator encoding) const { // encode it if (RoundTrip()) return (*encoding)->EncodeRoundtrip(first, last); else return (*encoding)->EncodePretty(first, last); } void cEncoder::Decode(TSTRING& strIn) const { // TODO:BAM -- reserve space for strOut as an optimization? TSTRING strOut; // decoded string we will build up TSTRING::const_iterator cur = strIn.begin(); // pointer to working position in strIn const TSTRING::const_iterator end = strIn.end(); // end of strIn TSTRING::const_iterator first = end; // identifies beginning of current character TSTRING::const_iterator last = end; // identifies end of current character // while get next char (updates cur) while (cCharUtil::PopNextChar(cur, end, first, last)) { // is this char the escape character? if (IsSingleTCHAR(first, last) && *first == iCharEncoder::EscapeChar()) { // get to identifier if (!cCharUtil::PopNextChar(cur, end, first, last)) ThrowAndAssert(eBadDecoderInput()); // this algorithm assumes that all identifiers are single char // so anything following the escape char should be a // single-char identifier if (!IsSingleTCHAR(first, last)) THROW_INTERNAL("displayencoder.cpp"); // determine to which encoding the identifier belongs bool fFoundEncoding = false; sack_type::const_iterator atE; for (atE = m_encodings.begin(); atE != m_encodings.end(); atE++) { // is this the right encoding? if (*first == (*atE)->Identifier()) { // this is the correct encoding.... fFoundEncoding = true; // ...so decode char strOut += (*atE)->Decode(&first, end); // should modify cur cur = first; // advance current char pointer break; // no need to run other tests after // this because all identifiers should be unique } } if (!fFoundEncoding) ThrowAndAssert(eUnknownEscapeEncoding(TSTRING(1, *first))); } else { strOut.append(first, last); } } strIn = strOut; } ////////////////////////////////////////////////////////////////////////////// // ENCODER SCHEMA VALIDATION ////////////////////////////////////////////////////////////////////////////// void cEncoder::ValidateSchema() const { ASSERT(OnlyOneCatagoryPerChar()); ASSERT(AllIdentifiersUnique()); } // only tests single TCHAR characters (but of those, tests all of them) bool cEncoder::OnlyOneCatagoryPerChar() const { // TODO:BAM - man, is there a better way to do this? TCHAR ch = std::numeric_limits::min(); TSTRING ach(1, ch); if (ch != std::numeric_limits::max()) { do { bool fFailedATest = false; ach[0] = ch; for (sack_type::const_iterator atE = m_encodings.begin(); atE != m_encodings.end(); atE++) { if ((*atE)->NeedsEncoding(ach.begin(), ach.end())) { if (fFailedATest) return false; // each char can only fail one test else fFailedATest = true; } } ch++; } while (ch != std::numeric_limits::max()); } return true; } bool cEncoder::AllIdentifiersUnique() const { TSTRING chars; for (sack_type::const_iterator atE = m_encodings.begin(); atE != m_encodings.end(); atE++) { TCHAR chID = (*atE)->Identifier(); if (chars.find(chID) == TSTRING::npos) chars += chID; else return false; } return true; } bool cEncoder::AllTestsRunOnEncodedString(const TSTRING& s) const { TSTRING::const_iterator cur = s.begin(); // pointer to working position in s const TSTRING::const_iterator end = s.end(); // end of s TSTRING::const_iterator first = end; // identifies beginning of current character TSTRING::const_iterator last = end; // identifies end of current character // while get next char (updates cur) while (cCharUtil::PopNextChar(cur, end, first, last)) { sack_type::const_iterator atE; for (atE = m_encodings.begin(); atE != m_encodings.end(); atE++) { if ((*atE)->NeedsEncoding(first, last)) { return false; } } } return true; } ////////////////////////////////////////////////////////////////////////////// // cDisplayEncoder MEMBERS ////////////////////////////////////////////////////////////////////////////// cDisplayEncoder::cDisplayEncoder(Flags f) : cEncoder(NON_NARROWABLE | NON_PRINTABLE | BACKSLASH | DBL_QUOTE, f) { } void cDisplayEncoder::Encode(TSTRING& str) const { cEncoder::Encode(str); } bool cDisplayEncoder::Decode(TSTRING& str) const { cEncoder::Decode(str); return true; // TODO:BAM -- throw error! }