diff --git utils/text/operations.cpp utils/text/operations.cpp index 736e7f3..5a4345d 100644 --- utils/text/operations.cpp +++ utils/text/operations.cpp @@ -38,6 +38,9 @@ namespace text = utils::text; /// Replaces XML special characters from an input string. /// +/// The list of XML special characters is specified here: +/// http://www.w3.org/TR/xml11/#charsets +/// /// \param in The input to quote. /// /// \return A quoted string without any XML special characters. @@ -46,25 +49,34 @@ text::escape_xml(const std::string& in) { std::ostringstream quoted; - const char* delims = "\"&<>'"; // Keep in sync with 'switch' below. - std::string::size_type start_pos = 0; - std::string::size_type last_pos = in.find_first_of(delims); - while (last_pos != std::string::npos) { - quoted << in.substr(start_pos, last_pos - start_pos); - switch (in[last_pos]) { - case '"': quoted << """; break; - case '&': quoted << "&"; break; - case '<': quoted << "<"; break; - case '>': quoted << ">"; break; - case '\'': quoted << "'"; break; - default: UNREACHABLE; + for (std::string::const_iterator it = in.begin(); + it != in.end(); ++it) { + unsigned char c = (unsigned char)*it; + if (c == '"') { + quoted << """; + } else if (c == '&') { + quoted << "&"; + } else if (c == '<') { + quoted << "<"; + } else if (c == '>') { + quoted << ">"; + } else if (c == '\'') { + quoted << "'"; + } else if ((c >= 0x01 && c <= 0x08) || + (c >= 0x0B && c <= 0x0C) || + (c >= 0x0E && c <= 0x1F) || + (c >= 0x7F && c <= 0x84) || + (c >= 0x86 && c <= 0x9F)) { + // for RestrictedChar characters, escape them + // as '&#[decimal ASCII value];' + // so that in the XML file we will see the escaped + // character. + quoted << "&#" << static_cast< std::string::size_type >(*it) + << ";"; + } else { + quoted << *it; } - start_pos = last_pos + 1; - last_pos = in.find_first_of(delims, start_pos); } - if (start_pos < in.length()) - quoted << in.substr(start_pos); - return quoted.str(); } diff --git utils/text/operations_test.cpp utils/text/operations_test.cpp index 769b7d4..2d5ab36 100644 --- utils/text/operations_test.cpp +++ utils/text/operations_test.cpp @@ -77,6 +77,7 @@ ATF_TEST_CASE_BODY(escape_xml__no_escaping) { ATF_REQUIRE_EQ("a", text::escape_xml("a")); ATF_REQUIRE_EQ("Some text!", text::escape_xml("Some text!")); + ATF_REQUIRE_EQ("\n\t\r", text::escape_xml("\n\t\r")); } @@ -90,6 +91,8 @@ ATF_TEST_CASE_BODY(escape_xml__some_escaping) ATF_REQUIRE_EQ(""&<>'", text::escape_xml("\"&<>'")); ATF_REQUIRE_EQ("&&&", text::escape_xml("&&&")); + ATF_REQUIRE_EQ("&#8;&#11;", text::escape_xml("\b\v")); + ATF_REQUIRE_EQ("\t&#127;BAR&", text::escape_xml("\t\x7f""BAR&")); }