34 #include <corelib/ncbisys.hpp>
38 #define NCBI_USE_ERRCODE_X Html_Lib
68 while ( ptr !=
NPOS ) {
81 bool is_entity =
false;
84 (ptr+2 <
str.length()) &&
85 (semicolon !=
NPOS)) {
86 if ( ptr >= semicolon )
87 semicolon =
str.find(
";", ptr+1);
88 if ( semicolon !=
NPOS ) {
91 if (
str[ptr+1] ==
'#') {
96 for (; p < semicolon; ++p) {
106 for (; p < semicolon; ++p) {
113 is_entity = (p == semicolon);
119 "\" contains HTML encoded entities");
165 while ( (pos =
s.find(
"<!--", pos)) !=
NPOS ) {
167 if ( pos_end ==
NPOS ) {
170 s.erase(pos, pos_end - pos + 3);
174 while ( (pos =
s.find(
"<@", pos)) !=
NPOS ) {
176 if ( pos_end ==
NPOS ) {
179 s.erase(pos, pos_end - pos + 2);
184 while ( (pos =
s.find(
"<", pos)) !=
NPOS ) {
186 if ( pos_end ==
NPOS ) {
189 if (pos <
s.size() &&
190 (
isalpha((
unsigned char)
s[pos + 1]) ||
s[pos + 1] ==
'/' )) {
191 s.erase(pos, pos_end - pos + 1);
206 while ( (pos =
s.find(
"&", pos)) !=
NPOS ) {
208 if ( pos_end ==
NPOS ) {
211 if ( (pos_end - pos) > 2 && (pos_end - pos) < 8 ) {
214 if (
s[start] ==
'#') {
220 bool need_delete =
true;
228 s.erase(pos, pos_end - pos + 1);
514 "Unable to guess the source string encoding", 0);
518 ustr.reserve(
str.size());
520 string::const_iterator
i, e =
str.end();
524 for (
i =
str.begin();
i != e;) {
527 if (
i != e && ch ==
'&') {
528 string::const_iterator itmp, end_of_entity, start_of_entity;
529 itmp = end_of_entity = start_of_entity =
i;
530 bool ent, dec,
hex, parsed=
false;
531 ent =
isalpha((
unsigned char)(*itmp)) != 0;
532 dec = !ent && *itmp ==
'#' && ++itmp != e &&
533 isdigit((
unsigned char)(*itmp)) != 0;
534 hex = !dec && itmp != e &&
535 (*itmp ==
'x' || *itmp ==
'X') && ++itmp != e &&
536 isxdigit((
unsigned char)(*itmp)) != 0;
537 start_of_entity = itmp;
538 if (itmp != e && (ent || dec ||
hex)) {
540 for (
int len=0;
len<16 && itmp != e; ++
len, ++itmp) {
541 if (*itmp ==
'&' || *itmp ==
'#') {
545 end_of_entity = itmp;
548 ent = ent &&
isalnum( (
unsigned char)(*itmp)) != 0;
549 dec = dec &&
isdigit( (
unsigned char)(*itmp)) != 0;
552 if (end_of_entity !=
i && (ent || dec ||
hex)) {
555 string entity(start_of_entity,end_of_entity);
557 for ( ; p->
u != 0; ++p) {
558 if (entity.compare(p->
s) == 0) {
568 for (itmp = start_of_entity;
569 itmp != end_of_entity; ++itmp) {
574 if (
ud >=
'0' &&
ud <=
'9') {
576 }
else if (
ud >=
'a' &&
ud <=
'f') {
579 }
else if (
ud >=
'A' &&
ud <=
'F') {
597 ustr.append( 1, ch );
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
std::ofstream out("events_result.xml")
main entry point for tests
#define ERR_POST_X_ONCE(err_subcode, message)
Error posting only once during program execution with default error code and given error subcode.
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
void Info(CExceptionArgs_Base &args)
static void SetNL(const string &nl)
static const char * sm_newline
static string HTMLEncode(const string &str, THTMLEncodeFlags flags=fEncodeAll)
HTML encodes a string. E.g. <.
static CStringUTF8 HTMLDecode(const string &str, EEncoding encoding=eEncoding_Unknown, THTMLDecodeFlags *result_flags=NULL)
Decode HTML entities and character references.
static string StripSpecialChars(const string &str)
Strip all named and numeric character entities from a string.
static string StripTags(const string &str)
Strip all HTML tags from a string.
static string HTMLAttributeEncode(const string &str, THTMLEncodeFlags flags=fSkipEntities)
HTML encodes a tag attribute ('&' and '"' symbols).
@ fSkipLiteralEntities
Skip "&entity;".
@ fSkipNumericEntities
Skip "&#NNNN;".
@ fCheckPreencoded
Print warning if some preencoded entity found in the string.
@ fCharRef_Entity
Character entity reference(s) was found.
@ fCharRef_Numeric
Numeric character reference(s) was found.
@ fEncoding
Character encoding changed.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
Uint4 TUnicodeSymbol
Unicode character.
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
unsigned int
A callback function used to compare two keys in a database.
static struct tag_HtmlEntities s_HtmlEntities[]
static string s_HTMLEncode(const string &str, const string &set, CHTMLHelper::THTMLEncodeFlags flags)
Definition of all error codes used in html library (xhtml.lib).
static void hex(unsigned char c)
static unsigned int ud(time_t one, time_t two)
#define NcbiSysChar_strdup
static const char * str(char *buf, int n)