CPP_DOC/doxyhtml/objutil_8cpp_source.html

 /*  $Id: objutil.cpp 100324 2023-07-20 14:30:16Z vasilche $

 * ===========================================================================

 *

 *                            PUBLIC DOMAIN NOTICE

 *               National Center for Biotechnology Information

 *

 *  This software/database is a "United States Government Work" under the

 *  terms of the United States Copyright Act.  It was written as part of

 *  the author's official duties as a United States Government employee and

 *  thus cannot be copyrighted.  This software/database is freely available

 *  to the public for use. The National Library of Medicine and the U.S.

 *  Government have not placed any restriction on its use or reproduction.

 *

 *  Although all reasonable efforts have been taken to ensure the accuracy

 *  and reliability of the software and data, the NLM and the U.S.

 *  Government do not and cannot warrant the performance or results that

 *  may be obtained by using this software or data. The NLM and the U.S.

 *  Government disclaim all warranties, express or implied, including

 *  warranties of performance, merchantability or fitness for any particular

 *  purpose.

 *

 *  Please cite the author in any work or product based on this material.

 *

 * ===========================================================================

 *

 * Author:  Mati Shomrat, NCBI

 *

 * File Description:

 *   shared utility functions

 *

 */

 #include <ncbi_pch.hpp>

 #include <corelib/ncbistd.hpp>


 #include <util/strsearch.hpp>


 #include <objects/general/Date.hpp>

 #include <objects/general/User_object.hpp>

 #include <objects/general/User_field.hpp>

 #include <objects/general/Object_id.hpp>

 #include <objects/general/Date.hpp>

 #include <objects/seq/Bioseq.hpp>

 #include <objects/seq/Seq_inst.hpp>

 #include <objects/seq/Seq_ext.hpp>

 #include <objects/seq/Delta_ext.hpp>

 #include <objects/seq/Delta_seq.hpp>

 #include <objects/seq/Seq_literal.hpp>

 #include <objects/seq/MolInfo.hpp>

 #include <objects/seq/seqport_util.hpp>

 #include <objects/seqloc/Seq_loc.hpp>

 #include <objmgr/scope.hpp>

 #include <objmgr/bioseq_handle.hpp>

 #include <objmgr/seqdesc_ci.hpp>

 #include <objmgr/object_manager.hpp>

 #include <objmgr/util/sequence.hpp>

 #include <objects/general/general_macros.hpp>

 #include <algorithm>

 #include <objmgr/util/objutil.hpp>


 BEGIN_NCBI_SCOPE

 BEGIN_SCOPE(objects)


 SAFE_CONST_STATIC_STRING(kLegalPathChars, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_-.");


 bool IsPartOfUrl(const string& sentence, size_t pos)

 {

     string separators( "( \t\r\n" );

     const string& legal_path_chars = kLegalPathChars.Get();


     //

     //  Weed out silly input:

     //

     if ( sentence == "" || pos > sentence.length() - 1 ) {

         return false;

     }

     if ( string::npos != separators.find( sentence[ pos ] ) ) {

         return false;

     }


     // Do easy tests first:


     //  We require the tilde to show up in a pattern like

     //  "/~[0..9A..Za..z_-.]+". This is inherited from the C toolkit flat file

     //  generator:

     //

     if ( (pos < 1) || (sentence[ pos-1 ] != '/') ) {

         return false;

     }


     //

     //  Find the start of the "word" that surrounds the given position:

     //

     separators += '~';

     string::size_type left_edge = sentence.find_last_of( separators, pos-1 );

     if ( left_edge == string::npos ) {

         left_edge = 0;

     }

     else {

         ++left_edge;

     }


     //

     //  If it's a URL, it better start with a protocol specifier we approve of:

     //

     static const char* sc_ProtocolSpecifiers[] = {

       "URL:",

       "http:",

       "https:",

     };

     DEFINE_STATIC_ARRAY_MAP_WITH_COPY(CStaticArraySet<string>, vProtocolSpecifiers, sc_ProtocolSpecifiers);

     size_t colon = sentence.find( ':', left_edge );

     if ( colon == string::npos ) {

         return false;

     }

     string strMaybeUrl = sentence.substr( left_edge, colon - left_edge + 1 );

     if ( vProtocolSpecifiers.find( strMaybeUrl ) == vProtocolSpecifiers.end() ) {

         return false;

     }


     ++pos;

     if ( string::npos == legal_path_chars.find( sentence[ pos ] ) ) {

         return false;

     }


     for ( ++pos; sentence[ pos ] != 0; ++pos ) {

         if ( string::npos == legal_path_chars.find( sentence[ pos ] ) ) {

             return ( sentence[ pos ] == '/' );

         }

     }


     return false; /* never found the terminating '/' */

 };


 static bool s_RunOfStars(string& s, SIZE_TYPE start, SIZE_TYPE length)

 {

     SIZE_TYPE max = start + 66;

     if (max >= length) {

         return false;

     }

     for (SIZE_TYPE i = start; i < max; i++) {

         if (s[i] != '*') {

             return false;

         }

     }

     return true;

 }


 void ExpandTildes(string& s, ETildeStyle style)

 {

     if ( style == eTilde_tilde ) {

         return;

     }


     SIZE_TYPE start = 0, tilde, length = s.length();


     tilde = s.find('~', start);

     if (tilde == NPOS) {  // no tilde

         return;

     }


     string result;


     while ( (start < length)  &&  (tilde = s.find('~', start)) != NPOS ) {

         result.append(s, start, tilde - start);

         char next = (tilde + 1) < length ? s[tilde + 1] : 0;

         switch ( style ) {

         case eTilde_space:

             if ( (tilde + 1 < length  &&  isdigit((unsigned char) next) )  ||

                  (tilde + 2 < length  &&  (next == ' '  ||  next == '(')  &&

                   isdigit((unsigned char) s[tilde + 2]))) {

                 result += '~';

             } else {

                 result += ' ';

             }

             start = tilde + 1;

             break;


         case eTilde_newline:

             if ( tilde + 1 < length  &&  s[tilde + 1] == '~' ) {

                 result += '~';

                 start = tilde + 2;

             } else {

                 result += "\n";

                 start = tilde + 1;

             }

             break;


         case eTilde_note:

             if ( tilde + 1 < length  &&  s[tilde + 1] == '~' ) {

                 result += '~';

                 start = tilde + 2;

             } else {

                 // plain "~" expands to ";\n", unless it's after a space or semi-colon, in

                 // which case it becomes a plain "\n"

                 char prevChar = ( tilde >= 1 ? s[tilde - 1] : '\0' );


                 if( ' ' == prevChar || ';' == prevChar ) {

                     result += '\n';

                 } else {

                     result += ";\n";

                 }

                 start = tilde + 1;

             }

             break;


         case eTilde_comment:

             if (tilde > 0  &&  s[tilde - 1] == '`') {

                 result.replace(result.length() - 1, 1, 1,'~');

             }

             else if ( IsPartOfUrl( s, tilde ) ) {

                 result += '~';

             }

             else {

                 result += "\n";

             }

             start = tilde + 1;

             if (s[start] == ' ' && s_RunOfStars(s, start+1, length)) {

               start++;

               result += '\n';

             }

             break;


         default: // just keep it, for lack of better ideas

             result += '~';

             start = tilde + 1;

             break;

         }

     }

     if (start < length) {

         result.append(s, start, NPOS);

     }

     s.swap(result);

 }


 void ConvertQuotes(string& str)

 {

     replace(str.begin(), str.end(), '\"', '\'');

 }


 string ConvertQuotes(const string& str)

 {

     string retval = str;

     ConvertQuotes(retval);

     return retval;

 }


 // Strips all spaces in string in following manner. If the function

 // meet several spaces (spaces and tabs) in succession it replaces them

 // with one space. Strips all spaces after '(' and before ( ')' or ',' ).

 bool StripSpaces(string& str)

 {

     if (str.empty()) {

         return false;

     }

     auto orig_len = str.length();


     NStr::ReplaceInPlace(str, "\t", " ");

     auto this_len = str.length();

     NStr::ReplaceInPlace(str, "  ", " ");

     while (str.length() != this_len) {

         this_len = str.length();

         NStr::ReplaceInPlace(str, "  ", " ");

     }

     NStr::ReplaceInPlace(str, "( ", "(");

     NStr::ReplaceInPlace(str, " )", ")");

     NStr::ReplaceInPlace(str, " ,", ",");


 #if 0


     string::iterator end = str.end();

     string::iterator it = str.begin();

     string::iterator new_str = it;

     while (it != end) {

         *new_str++ = *it;

         if ( (*it == ' ')  ||  (*it == '\t')  ||  (*it == '(') ) {

             for (++it; it != end && (*it == ' ' || *it == '\t'); ++it)

                 continue;

             if (it != end && (*it == ')' || *it == ',')) {

                 if( *(new_str - 1) != '(' ) { // this if protects against the case "(...bunch of spaces and tabs...)".  Otherwise, the first '(' is erased

                     --new_str;

                 }

             }

         } else {

             ++it;

         }

     }

     str.erase(new_str, str.end());

 #endif

     return (orig_len != str.length());

 }


 bool RemovePeriodFromEnd(string& str, bool keep_ellipsis)

 {


     // NB: this is likely a better solution; however, the C toolkit differs...

     //string::size_type pos = str.find_last_not_of(".,;:() ");

     // string::size_type pos = str.find_last_not_of(".,;: ");

     //string::size_type pos = str.find_last_not_of(".");

     //string::size_type pos2 = str.find("...", pos);

     //// string::size_type pos3 = str.find_first_of(".", pos);

     //if (pos < str.size() - 1) {

     //    str.erase(pos + 1);

     //    if (keep_ellipsis  &&  pos2 != string::npos) {

     //        str += "...";

     //    }

     //}

     //return ( pos != string::npos );


     const string::size_type len = str.length();


     if( keep_ellipsis ) {

         if( len >= 3 && str[len-1] == '.' && str[len-2] == '.' && str[len-3] == '.' ) {

             return false;

         }

     }


     // chop off period if there's one at the end

     if( len >= 1 && str[len-1] == '.' ) {

         str.resize( len - 1 );

         return true;

     } else {

         return false;

     }


     /* string::size_type pos2 = str.find_last_not_of(";,.");

     string::size_type pos3 = str.find_last_not_of(" ", pos2);

     if (pos3 < pos2) {

         str.erase(pos3 + 1);

         pos2 = str.find_last_not_of(";,.");

     }


     string::size_type pos = str.find_last_not_of(".");

     if (pos2 < str.size() - 1) {

         if (keep_ellipsis) {

             /// trim the end to an actual ellipsis

             if (str.length() - pos2 > 3) {

                 if (pos2 < pos) {

                     str.erase(pos2 + 1);

                     str += "...";

                     return true;

                 }

                 pos += 3;

             }

             else if (pos2 < pos) {

                 pos = pos2;

             }

         } else if (pos2 < pos) {

             pos = pos2;

         }

         if (pos < str.size() - 1) {

             str.erase(pos + 1);

             return true;

         }

     } */


     /**

     static const char* kEllipsis = "...";


     if ( NStr::EndsWith(str, '.') ) {

         if ( !keep_ellipsis  ||  !NStr::EndsWith(str, kEllipsis) ) {

             str.erase(str.length() - 1);

             return true;

         }

     }

     **/

     // return false;

 }


 void AddPeriod(string& str)

 {

     size_t pos = str.find_last_not_of(" \t~.\n");

     str.erase(pos + 1);

     str += '.';

 }


 void TrimSpaces(string& str, size_t indent)

 {

     if (str.empty()  ||  str.length() <= indent) {

         return;

     }


     size_t end = str.length() - 1;

     while (end >= indent  &&  isspace((unsigned char) str[end])) {

         end--;

     }

     if (end < indent) {

         str.erase(indent);

     } else {

         str.erase(end + 1);

     }

 }


 // needed because not all compilers will just let you pass "isgraph" to STL find_if

 class CIsGraph

 {

 public:

     bool operator()( const char c ) {

         return isgraph((unsigned char)c) != 0;

     }

 };


 // This will compress multiple spaces in a row.

 // It also translates unprintable characters to spaces.

 // If trim_beginning, strips all spaces and unprintables from beginning of string.

 // If trim_end, strips all spaces and unprintables from end of string.

 // returns the string you gave it.

 string& CompressSpaces( string& str, const bool trim_beginning, const bool trim_end )

 {

     if( str.empty() ) {

         return str;

     }


     // set up start_iter and end_iter to determine the range in which we're looking


     string::iterator start_iter = str.begin();

     if( trim_beginning ) {

         start_iter = find_if( str.begin(), str.end(), CIsGraph() );

     }

     if( str.end() == start_iter ) {

         str.clear();

         return str;

     }


     string::iterator end_iter = str.end();

     if( trim_end ) {

         string::reverse_iterator rev_iter = find_if( str.rbegin(), str.rend(), CIsGraph() );

         end_iter = str.begin() + ( str.rend() - rev_iter );

     }

     if( str.begin() == end_iter ) {

         str.clear();

         return str;

     }


     // The main part, where we compress spaces

     string newstr; // result will end up here

     newstr.reserve( end_iter - start_iter );


     // efficiency note: If the efficiency of unique_copy followed by transform becomes

     // burdensome, we may have to replace these 2 calls with one raw loop that does

     // what those calls do ( a sloppier and more bug-prone ( but faster ), prospect)


     // copy such that consecutive spaces or control characters are compressed to one space

     char last_ch_was_printable = true;

     for( string::iterator iter = start_iter; iter < end_iter; ++iter ) {

         const char ch = *iter;

         if( isgraph(ch) ) {

             // visible characters get copied straight

             newstr += ch;

             last_ch_was_printable = true;

         } else {

             // unprintable chars become space, and they're only appended if the last char was

             // printable

             if( last_ch_was_printable ) {

                 newstr += ' ';

             }

             last_ch_was_printable = false;

         }

     }


     str.swap( newstr );

     return str;

 }


 // returns true if it changed the string

 bool TrimSpacesAndJunkFromEnds(string& str, bool allow_ellipsis)

 {

     // TODO: This commented out code represents how ellipsis trimming

     // should work.  However, for compatibility with C, we're using a

     // (in my opinion) suboptimal algorithm.  We can switch over later.


     //if (str.empty()) {

     //    return;

     //}


     //size_t strlen = str.length();

     //size_t begin = 0;


     //// trim unprintable characters (and space) off the beginning

     //while (begin != strlen) {

     //    unsigned char ch = str[begin];

     //    if (ch > ' ') {

     //        break;

     //    } else {

     //        ++begin;

     //    }

     //}


     //// we're done if we trimmed the string to nothing

     //if (begin == strlen) {

     //    str.erase();

     //    return;

     //}


     //// trim junk off the end (while we're at it, record whether we're chopping off a period)

     //size_t end = strlen - 1;

     //bool has_period = false;

     //while (end > begin) {

     //    unsigned char ch = str[end];

     //    if (ch <= ' '  ||  ch == '.'  ||  ch ==  ','  ||  ch == '~'  ||  ch == ';') {

     //        has_period = (has_period  ||  ch == '.');

     //        --end;

     //    } else {

     //        break;

     //    }

     //}


     //// check whether we're about to chop off an ellipsis, so we remember to add it back

     //// TODO: There's got to be a more efficient way of doing this

     //const bool weChoppedOffAnEllipsis = ( NPOS != NStr::Find(str, "...", end) );


     //// do the actual chopping here

     //str = str.substr( begin, end + 1 );


     //// restore chopped off ellipsis or period, if any

     //if ( allow_ellipsis && weChoppedOffAnEllipsis ) {

     //    str += "...";

     //} else if (has_period) {

     //    // re-add any periods if we had one before

     //    str += '.';

     //}


     // This is based on the C function TrimSpacesAndJunkFromEnds.

     // Although it's updated to use iterators and such and to

     // return whether it changed the string, it should

     // have the same output, except:

     // - We do NOT chop off a semicolon if we determine that it's

     //   part of an HTML escape char (e.g. "&bgr;" ).

     // - There are some changes in how tildes are handled;

     //   this algo is less likely to remove them.


     if ( str.empty() ) {

         return false;

     }


     // make start_of_junk_pos hold the beginning of the "junk" at the end

     // (where junk is defined as one of several characters)

     // while we're at it, also check if the junk contains a tilde and/or period

     bool isPeriod = false;

     bool isTilde = false;

     int start_of_junk_pos = (int)str.length() - 1;

     for( ; start_of_junk_pos >= 0 ; --start_of_junk_pos ) {

         const char ch = str[start_of_junk_pos];

         if (ch <= ' ' || ch == '.' || ch == ',' || ch == '~' || ch == ';') {

             // found junk character


             // also, keep track of whether the junk includes a period and/or tilde

             isPeriod = (isPeriod || ch == '.');

             isTilde = (isTilde || ch == '~');

         } else {

             // found non-junk character.  Last junk character is just after this

             ++start_of_junk_pos;

             break;

         }

     }

     // special case of the whole string being junk

     if( start_of_junk_pos < 0 ) {

         start_of_junk_pos = 0;

     }


     // check for ';' that's part of an HTML escape char like "&bgr;" and

     // skip over it (i.e., don't remove it) if so

     if( start_of_junk_pos < (int)str.length() && str[start_of_junk_pos] == ';' ) {

         // we assume no HTML escape char will be longer than this

         static const int kMaxCharsToLookAt = 20;


         // go backwards, looking for the ampersand

         int amp_iter = (start_of_junk_pos - 1);

         for( ; amp_iter >= 0 && ((start_of_junk_pos - amp_iter) < kMaxCharsToLookAt); --amp_iter ) {

             const char ch = str[amp_iter];

             if( isalnum(ch) || ch == '#' ) {

                 // just keep going

             } else if( ch == '&' ) {

                 // The semicolon ends an HTML escape character, so we skip it

                 ++start_of_junk_pos;

                 break;

             } else {

                 // The semicolon does NOT end an HTML escape character, so we might remove it

                 break;

             }

         }

     }


     bool changed = false;


     // if there's junk, chop it off (but leave period/tildes/ellipsis as appropriate)

     if ( start_of_junk_pos < (int)str.length() ) {


         // holds the suffix to add after we remove the junk

         const char * suffix = ""; // by default, just remove junk


         const int chars_in_junk = ( (int)str.length() - start_of_junk_pos );

         _ASSERT( chars_in_junk >= 1 );


         // allow one period at end

         if (isPeriod) {

             // check if we should put an ellipsis, or just a period

             const bool putEllipsis = ( allow_ellipsis && (chars_in_junk >= 3) &&

                 str[start_of_junk_pos+1] == '.' && str[start_of_junk_pos+2] == '.' );


             suffix = ( putEllipsis ? "..." : "." );

         } else if (isTilde ) {

             // allow tilde(s)

             // (This should work on single- AND double-tildes because

             // we don't know whether or not tilde-expansion was called before this

             // point )

             if ( str[start_of_junk_pos] == '~' ) {

                 const bool doubleTilde = ( (chars_in_junk >= 2) && str[start_of_junk_pos+1] == '~' );

                 suffix = ( doubleTilde  ? "~~" : "~" );

             }

         }

         if( suffix[0] != '\0' ) {

             if( 0 != str.compare( start_of_junk_pos, INT_MAX, suffix) ) {

                 str.erase( start_of_junk_pos );

                 str += suffix;

                 changed = true;

             }

         } else if ( start_of_junk_pos < (int)str.length() ) {

             str.erase( start_of_junk_pos );

             changed = true;

         }

     }


     // copy the part after the initial whitespace to the destination

     string::iterator input_iter = str.begin();

     while ( input_iter != str.end() && *input_iter <= ' ') {

         ++input_iter;

     }

     if( input_iter != str.begin() ) {

         str.erase( str.begin(), input_iter );

         changed = true;

     }


     return changed;

 }


 // this is copy-pasted method and optimized to use CTempString

 void TrimSpacesAndJunkFromEnds(string& result, const CTempString& str, bool allow_ellipsis)

 {

     // TODO: This commented out code represents how ellipsis trimming

     // should work.  However, for compatibility with C, we're using a

     // (in my opinion) suboptimal algorithm.  We can switch over later.


     //if (str.empty()) {

     //    return;

     //}


     //size_t strlen = str.length();

     //size_t begin = 0;


     //// trim unprintable characters (and space) off the beginning

     //while (begin != strlen) {

     //    unsigned char ch = str[begin];

     //    if (ch > ' ') {

     //        break;

     //    } else {

     //        ++begin;

     //    }

     //}


     //// we're done if we trimmed the string to nothing

     //if (begin == strlen) {

     //    str.erase();

     //    return;

     //}


     //// trim junk off the end (while we're at it, record whether we're chopping off a period)

     //size_t end = strlen - 1;

     //bool has_period = false;

     //while (end > begin) {

     //    unsigned char ch = str[end];

     //    if (ch <= ' '  ||  ch == '.'  ||  ch ==  ','  ||  ch == '~'  ||  ch == ';') {

     //        has_period = (has_period  ||  ch == '.');

     //        --end;

     //    } else {

     //        break;

     //    }

     //}


     //// check whether we're about to chop off an ellipsis, so we remember to add it back

     //// TODO: There's got to be a more efficient way of doing this

     //const bool weChoppedOffAnEllipsis = ( NPOS != NStr::Find(str, "...", end) );


     //// do the actual chopping here

     //str = str.substr( begin, end + 1 );


     //// restore chopped off ellipsis or period, if any

     //if ( allow_ellipsis && weChoppedOffAnEllipsis ) {

     //    str += "...";

     //} else if (has_period) {

     //    // re-add any periods if we had one before

     //    str += '.';

     //}


     // This is based on the C function TrimSpacesAndJunkFromEnds.

     // Although it's updated to use iterators and such and to

     // return whether it changed the string, it should

     // have the same output, except:

     // - We do NOT chop off a semicolon if we determine that it's

     //   part of an HTML escape char (e.g. "&bgr;" ).

     // - There are some changes in how tildes are handled;

     //   this algo is less likely to remove them.


     if (str.empty()) {

         result.clear();

         return;

     }


     // make start_of_junk_pos hold the beginning of the "junk" at the end

     // (where junk is defined as one of several characters)

     // while we're at it, also check if the junk contains a tilde and/or period

     bool isPeriod = false;

     bool isTilde = false;

     size_t start_of_junk_pos = 0;

     for (size_t len = str.length(); len && start_of_junk_pos == 0; len--)

     {

         char ch = str[len-1];

         if (ch <= ' ') ch = ' ';

         switch (ch)

         {

           case '.':

               isPeriod = true;

               break;

           case '~':

               isTilde = true;

               break;

           case ';':

           case ',':

           case ' ':

               break;

           default:

               // found non-junk character.  Last junk character is just after this

               start_of_junk_pos = len;

               break;

         }

     }


     // check for ';' that's part of an HTML escape char like "&bgr;" and

     // skip over it (i.e., don't remove it) if so

     if (start_of_junk_pos < str.length() && str[start_of_junk_pos] == ';') {

         // we assume no HTML escape char will be longer than this

         static const int kMaxCharsToLookAt = 20;


         // go backwards, looking for the ampersand

         int amp_iter = ((int)start_of_junk_pos - 1);

         for (; amp_iter >= 0 && ((start_of_junk_pos - amp_iter) < kMaxCharsToLookAt); --amp_iter) {

             const unsigned char ch = str[amp_iter];

             if (isalnum(ch) || ch == '#') {

                 // just keep going

             }

             else if (ch == '&') {

                 // The semicolon ends an HTML escape character, so we skip it

                 ++start_of_junk_pos;

                 break;

             }

             else {

                 // The semicolon does NOT end an HTML escape character, so we might remove it

                 break;

             }

         }

     }


     // holds the suffix to add after we remove the junk

     CTempString suffix; // by default, just remove junk


     // if there's junk, chop it off (but leave period/tildes/ellipsis as appropriate)

     if (start_of_junk_pos < str.length()) {


         const int chars_in_junk = (int)(str.length() - start_of_junk_pos);

         _ASSERT(chars_in_junk >= 1);


         // allow one period at end

         if (isPeriod) {

             // check if we should put an ellipsis, or just a period

             const bool putEllipsis = (allow_ellipsis && (chars_in_junk >= 3) &&

                 str[start_of_junk_pos + 1] == '.' && str[start_of_junk_pos + 2] == '.');


             suffix = (putEllipsis ? "..." : ".");

         }

         else if (isTilde) {

             // allow tilde(s)

             // (This should work on single- AND double-tildes because

             // we don't know whether or not tilde-expansion was called before this

             // point )

             if (str[start_of_junk_pos] == '~') {

                 const bool doubleTilde = ((chars_in_junk >= 2) && str[start_of_junk_pos + 1] == '~');

                 suffix = (doubleTilde ? "~~" : "~");

             }

         }

     }

     const char* ptr = str.data();

     size_t len = start_of_junk_pos;

     while (len && *ptr <= ' ')

     {

         len--; ptr++;

     }

     result.reserve(len + suffix.length());

     result.assign(ptr, len);

     result.append(suffix.data(), suffix.length());

 }


 // two-bytes combinations we're looking to clean

 #define twochars(a,b) Uint2((a) << 8 | (b))

 #define twocommas twochars(',',',')

 #define twospaces twochars(' ',' ')

 #define twosemicolons twochars(';',';')

 #define space_comma twochars(' ',',')

 #define space_bracket twochars(' ',')')

 #define bracket_space twochars('(',' ')

 #define space_semicolon twochars(' ',';')

 #define comma_space twochars(',',' ')

 #define semicolon_space twochars(';',' ')


 void CleanAndCompress(string& dest, const CTempString& instr)

 {

     size_t left = instr.size();

     // this is the input stream

     const char* in = instr.data();


     // skip front white spaces

     while (left && *in == ' ')

     {

         in++;

         left--;

     }

     // forget end white spaces

     while (left && in[left - 1] == ' ')

     {

         left--;

     }


     dest.resize(left);


     if (left < 1) return;


     // this is where we write result

     char* out = (char*)dest.c_str();


     char curr = *in++; // initialize with first character

     left--;


     char next = 0;

     Uint2 two_chars = curr; // this is two bytes storage where we see current and previous symbols


     while (left > 0) {

         next = *in++;


         two_chars = Uint2((two_chars << 8) | next);


         switch (two_chars)

         {

         case twocommas: // replace double commas with comma+space

             *out++ = curr;

             next = ' ';

             break;

         case twospaces: // skip multiple spaces (only print last one)

             break;

         case twosemicolons: // skip multiple semicolons (only print last one)

             break;

         case bracket_space: // skip space after bracket

             next = curr;

             two_chars = curr;

             break;

         case space_bracket: // skip space before bracket

             break;

         case space_comma:

             *out++ = next;

             next = curr;

             *out++ = ' ';

             while ((next == ' ' || next == ',') && left > 0) {

                 next = *in;

                 in++;

                 left--;

             }

             two_chars = next;

             break;

         case space_semicolon:

             *out++ = next;

             next = curr;

             *out++ = ' ';

             while ((next == ' ' || next == ';') && left > 0) {

                 next = *in;

                 in++;

                 left--;

             }

             two_chars = next;

             break;

         case comma_space:

             *out++ = curr;

             *out++ = ' ';

             while ((next == ' ' || next == ',') && left > 0) {

                 next = *in;

                 in++;

                 left--;

             }

             two_chars = next;

             break;

         case semicolon_space:

             *out++ = curr;

             *out++ = ' ';

             while ((next == ' ' || next == ';') && left > 0) {

                 next = *in;

                 in++;

                 left--;

             }

             two_chars = next;

             break;

         default:

             *out++ = curr;

             break;

         }


         curr = next;

         if (left > 0) {

             left--;

         }

     }


     if (curr > 0 && curr != ' ') {

         *out++ = curr;

     }


     dest.resize(out - dest.c_str());

 }


 #if 0

 struct CleanAndCompress_unit_test

 {

     CleanAndCompress_unit_test()

     {

         test("C( )C");

         test("xx,,xx");

         test("xx,, xx");

         test("xx,,  xx");

         test("  xx  xx  ");

         test("xx , xx");

         test("xx  , xx");

         test("xx(xx)");

         test("xx( xx )");

     }

     void test(char* s)

     {

         string str;

         CleanAndCompress(str, s);

         cout << s << "--->" << str << '.' << endl;

     }

 };


 CleanAndCompress_unit_test t;

 #endif


 /*

 void CleanAndCompress (string& str)

 {

     if (str.empty()) {

         return;

     }


     size_t pos = str.find (" ,");

     if (pos != NPOS) {

         str [pos] = ',';

         str [pos+1] = ' ';

     }

     pos = str.find (",,");

     if (pos != NPOS) {

         str [pos+1] = ' ';

     }

     pos = str.find (" ;");

     if (pos != NPOS) {

         str [pos] = ';';

         str [pos+1] = ' ';

     }

     pos = str.find ("( ");

     if (pos != NPOS) {

         str [pos] = ' ';

         str [pos+1] = '(';

     }

     pos = str.find (" )");

     if (pos != NPOS) {

         str [pos] = ')';

         str [pos+1] = ' ';

     }


     string::iterator end = str.end();

     string::iterator it = str.begin();

     string::iterator new_str = it;

     while (it != end) {

         *new_str++ = *it;

         if ( (*it == ' ')  ||  (*it == '\t')  ||  (*it == '(') ) {

             for (++it; (it != end) && (*it == ' ' || *it == '\t'); ++it) continue;

             if ((it != end) && (*it == ')' || *it == ',') ) {

                 // this "if" protects against the case "(...bunch of spaces and tabs...)".

                 // Otherwise, the first '(' is unintentionally erased

                 if( *(new_str - 1) != '(' ) {

                     --new_str;

                 }

             }

         } else {

             ++it;

         }

     }

     str.erase(new_str, str.end());

 }

 */


 #if 0

 struct CJunkUnitTest

 {

     void test(CTempString v, bool a_e)

     {

         string res(v);

         TrimSpacesAndJunkFromEnds(res, a_e);

         TrimSpacesAndJunkFromEnds(res, v, a_e);

     }

     CJunkUnitTest()

     {

         test(" .", true);

         test(" aaa bbb.....", true);

         test(" aaa bbb.....", false);

         test(" aaa bbb~~~~~", true);

         test(" aaa bbb,,,,,", true);

         test(" aaa bbb;;;;;;", true);

     }

 };


 static CJunkUnitTest c;

 #endif


 static bool s_IsWholeWord(const string& str, size_t pos)

 {

     // NB: To preserve the behavior of the C toolkit we only test on the left.

     // This was an old bug in the C toolkit that was never fixed and by now

     // has become the expected behavior.

     return (pos > 0  &&  pos <= str.size()) ?

         isspace((unsigned char) str[pos - 1])  ||  ispunct((unsigned char) str[pos - 1]) : true;

 }


 void JoinString(string& to, const string& prefix, const string& str, bool noRedundancy)

 {

     if ( str.empty() ) {

         return;

     }


     if ( to.empty() ) {

         to += str;

         return;

     }


     size_t pos = NPOS;

     if (noRedundancy) {

         //for ( pos = NStr::Find(to, str); pos != NPOS; pos += str.length()) {

         for ( pos = NStr::Find(to, str);

               pos != NPOS;  pos = NStr::Find(to, str, pos + 1)) {

             if (s_IsWholeWord(to, pos)) {

                 return;

             }

         }

     }


     //LOG_POST(Error << "adding: to=" << to << "  prefix=" << prefix << "  str=" << str);


     if( NStr::StartsWith(prefix, ";") && NStr::EndsWith(to, ";") ) {

         to += prefix.substr(1);

     } else {

         to += prefix;

     }

     to += str;

 }


 string JoinString(const list<string>& l, const string& delim, bool noRedundancy)

 {

     if ( l.empty() ) {

         return kEmptyStr;

     }


     /**

     string result;

     set<CTempString> strings;

     ITERATE (list<string>, it, l) {

         if ( !noRedundancy  ||

              strings.insert(CTempString(*it)).second) {

             if ( !result.empty() ) {

                 result += delim;

             }

             result += *it;

         }

     }

     **/


     string result = l.front();

     list<string>::const_iterator it = l.begin();

     while ( ++it != l.end() ) {

         JoinString(result, delim, *it, noRedundancy);

     }


     return result;

 }


 /*

 // Validate the correct format of an accession string.

 static bool s_IsValidAccession(const string& acc)

 {

     static const size_t kMaxAccLength = 16;


     if ( acc.empty() ) {

         return false;

     }


     if ( acc.length() >= kMaxAccLength ) {

         return false;

     }


     // first character must be uppercase letter

     if ( !(isalpha((unsigned char) acc[0])  &&  isupper((unsigned char) acc[0])) ) {

         return false;

     }


     size_t num_alpha   = 0,

            num_undersc = 0,

            num_digits  = 0;


     const char* ptr = acc.c_str();

     if ( NStr::StartsWith(acc, "NZ_") ) {

         ptr += 3;

     }

     for ( ; isalpha((unsigned char)(*ptr)); ++ptr, ++num_alpha );

     for ( ; *ptr == '_'; ++ptr, ++num_undersc );

     for ( ; isdigit((unsigned char)(*ptr)); ++ptr, ++num_digits );


     if ( (*ptr != '\0')  &&  (*ptr != ' ')  &&  (*ptr != '.') ) {

         return false;

     }


     switch ( num_undersc ) {

     case 0:

         {{

             if ( (num_alpha == 1  &&  num_digits == 5)  ||

                  (num_alpha == 2  &&  num_digits == 6)  ||

                  (num_alpha == 3  &&  num_digits == 5)  ||

                  (num_alpha == 4  &&  num_digits == 8)  ||

                  (num_alpha == 4  &&  num_digits == 9) ) {

                 return true;

             }

         }}

         break;


     case 1:

         {{

             if( num_alpha == 3 && num_digits == 6 &&

                 NStr::StartsWith(acc, "MAP_") )

             {

                 return true;

             }


             // RefSeq accession

             if ( (num_alpha != 2)  ||

                  (num_digits != 6  &&  num_digits != 8  &&  num_digits != 9) ) {

                 return false;

             }


             char first_letter = acc[0];

             char second_letter = acc[1];


             if ( first_letter == 'N' ) {

                 if ( second_letter == 'C'  ||  second_letter == 'G'  ||

                      second_letter == 'M'  ||  second_letter == 'R'  ||

                      second_letter == 'P'  ||  second_letter == 'W'  ||

                      second_letter == 'T' ) {

                     return true;

                 }

             } else if ( first_letter == 'X' ) {

                 if ( second_letter == 'M'  ||  second_letter == 'R'  ||

                      second_letter == 'P' ) {

                     return true;

                 }

             } else if ( first_letter == 'Z'  ||  first_letter == 'A'  ||

                         first_letter == 'Y' ) {

                 return (second_letter == 'P');

             } else if ( first_letter == 'W' ) {

                 if ( second_letter == 'P' ) {

                     return true;

                 }

             }

         }}

         break;


     default:

         return false;

     }


     return false;

 }

 */


 static bool s_IsValidDotVersion(const string& accn)

 {

     size_t pos = accn.find('.');

     if (pos == NPOS) {

         return false;

     }

     size_t num_digis = 0;

     for (++pos; pos < accn.size(); ++pos) {

         if (isdigit((unsigned char) accn[pos])) {

             ++num_digis;

         } else {

             return false;

         }

     }


     return (num_digis >= 1);

 }


 bool IsValidAccession(const string& accn, EAccValFlag flag)

 {

     // bool valid = s_IsValidAccession(accn);

     bool valid = (CSeq_id::IdentifyAccession(accn) != CSeq_id::eAcc_unknown);

     if (valid  &&  flag == eValidateAccDotVer) {

         valid = s_IsValidDotVersion(accn);

     }

     return valid;

 }


 void DateToString(const CDate& date, string& str, EDateToString format_choice )

 {

     // One day we should make regular format default to JAN, since "JUN" seems

     // kind of arbitrary.

     static const char* regular_format = "%{%2D%|01%}-%{%3N%|JUN%}-%Y";

     static const char* cit_sub_format = "%{%2D%|??%}-%{%3N%|???%}-%{%4Y%|/???%}";

     static const char* patent_format  = "%{%2D%|01%}-%{%3N%|JAN%}-%Y";


     const char* format = ( format_choice == eDateToString_cit_sub ?

         cit_sub_format :

         ( format_choice == eDateToString_patent ? patent_format : regular_format ) );


     string date_str;

     date.GetDate(&date_str, format);

     NStr::ToUpper(date_str);

     str.append(date_str);

 }


 void GetDeltaSeqSummary(const CBioseq_Handle& seq, SDeltaSeqSummary& summary)

 {

     if ( !seq.IsSetInst()                                ||

          !seq.IsSetInst_Repr()                           ||

          !(seq.GetInst_Repr() == CSeq_inst::eRepr_delta) ||

          !seq.IsSetInst_Ext()                            ||

          !seq.GetInst_Ext().IsDelta() ) {

         return;

     }


     SDeltaSeqSummary temp;

     CScope& scope = seq.GetScope();


     const CDelta_ext::Tdata& segs = seq.GetInst_Ext().GetDelta().Get();

     temp.num_segs = segs.size();


     size_t len = 0;


     CNcbiOstrstream text;


     CDelta_ext::Tdata::const_iterator curr = segs.begin();

     CDelta_ext::Tdata::const_iterator end = segs.end();

     CDelta_ext::Tdata::const_iterator next;

     for ( ; curr != end; curr = next ) {

         {{

             // set next to one after curr

             next = curr; ++next;

         }}

         size_t from = len + 1;

         switch ( (*curr)->Which() ) {

         case CDelta_seq::e_Loc:

             {{

                 const CDelta_seq::TLoc& loc = (*curr)->GetLoc();

                 if ( loc.IsNull() ) {  // gap

                     ++temp.num_gaps;

                     text << "* " << from << ' ' << len

                          << " gap of unknown length~";

                 } else {  // count length

                     size_t tlen = sequence::GetLength(loc, &scope);

                     len += tlen;

                     temp.residues += tlen;

                     text << "* " << setw(8) << from << ' ' << setw(8) << len

                          << ": contig of " << tlen << " bp in length~";

                 }

             }}

             break;

         case CDelta_seq::e_Literal:

             {{

                 const CDelta_seq::TLiteral& lit = (*curr)->GetLiteral();

                 size_t lit_len = lit.CanGetLength() ? lit.GetLength() : 0;

                 len += lit_len;

                 if ( lit.CanGetSeq_data() && lit.GetSeq_data().Which() != CSeq_data::e_Gap ) {

                     temp.residues += lit_len;

                     while ( next != end  &&  (*next)->IsLiteral()  &&

                         (*next)->GetLiteral().CanGetSeq_data()  &&

                         (*next)->GetLiteral().GetSeq_data().Which() != CSeq_data::e_Gap ) {

                         const CDelta_seq::TLiteral& next_lit = (*next)->GetLiteral();

                         size_t next_len = next_lit.CanGetLength() ?

                             next_lit.GetLength() : 0;

                         lit_len += next_len;

                         len += next_len;

                         temp.residues += next_len;

                         ++next;

                     }

                     text << "* " << setw(8) << from << ' ' << setw(8) << len

                          << ": contig of " << lit_len << " bp in length~";

                 } else {

                     bool unk = false;

                     ++temp.num_gaps;

                     if ( lit.CanGetFuzz() ) {

                         const CSeq_literal::TFuzz& fuzz = lit.GetFuzz();

                         if ( fuzz.IsLim()  &&

                              fuzz.GetLim() == CInt_fuzz::eLim_unk ) {

                             unk = true;

                             ++temp.num_faked_gaps;

                             if ( from > len ) {

                                 text << "*                    gap of unknown length~";

                             } else {

                                 text << "* " << setw(8) << from << ' ' << setw(8) << len

                                      << ": gap of unknown length~";

                             }

                         }

                     }

                     if ( !unk ) {

                         text << "* " << setw(8) << from << " " << setw(8) << len

                              << ": gap of " << lit_len << " bp~";

                     }

                 }

             }}

             break;


         default:

             break;

         }

     }

     summary = temp;

     summary.text = CNcbiOstrstreamToString(text);

 }


 SAFE_CONST_STATIC_STRING(kTS_concept_trans,    "conceptual translation");

 SAFE_CONST_STATIC_STRING(kTS_concept_trans_a,  "conceptual translation supplied by author");

 SAFE_CONST_STATIC_STRING(kTS_both,             "conceptual translation with partial peptide sequencing");

 SAFE_CONST_STATIC_STRING(kTS_seq_pept,         "direct peptide sequencing");

 SAFE_CONST_STATIC_STRING(kTS_seq_pept_homol,   "sequenced peptide, ordered by homology");

 SAFE_CONST_STATIC_STRING(kTS_seq_pept_overlap, "sequenced peptide, ordered by overlap");


 const string& GetTechString(int tech)

 {


     switch ( tech ) {

     case CMolInfo::eTech_concept_trans:

         return kTS_concept_trans.Get();


     case CMolInfo::eTech_seq_pept :

         return kTS_seq_pept.Get();


     case CMolInfo::eTech_both:

         return kTS_both.Get();


     case CMolInfo::eTech_seq_pept_overlap:

         return kTS_seq_pept_overlap.Get();


     case CMolInfo::eTech_seq_pept_homol:

         return kTS_seq_pept_homol.Get();


     case CMolInfo::eTech_concept_trans_a:

         return kTS_concept_trans_a.Get();


     default:

         return kEmptyStr;

     }


     return kEmptyStr;

 }


 bool s_IsModelEvidanceUop(const CUser_object& uo)

 {

     return (uo.CanGetType()  &&  uo.GetType().IsStr()  &&

         uo.GetType().GetStr() == "ModelEvidence");

 }


 const CUser_object* s_FindModelEvidanceUop(const CUser_object& uo)

 {

     if ( s_IsModelEvidanceUop(uo) ) {

         return &uo;

     }


     const CUser_object* temp = 0;

     ITERATE (CUser_object::TData, ufi, uo.GetData()) {

         const CUser_field& uf = **ufi;

         if ( !uf.CanGetData() ) {

             continue;

         }

         const CUser_field::TData& data = uf.GetData();


         switch ( data.Which() ) {

         case CUser_field::TData::e_Object:

             temp = s_FindModelEvidanceUop(data.GetObject());

             break;


         case CUser_field::TData::e_Objects:

             ITERATE (CUser_field::TData::TObjects, obj, data.GetObjects()) {

                 temp = s_FindModelEvidanceUop(**obj);

                 if ( temp != 0 ) {

                     break;

                 }

             }

             break;


         default:

             break;

         }

         if ( temp != 0 ) {

             break;

         }

     }


     return temp;

 }


 bool s_GetModelEvidance(const CBioseq_Handle& bsh, SModelEvidance& me)

 {

     CConstRef<CUser_object> moduop;

     bool result = false;


     for (CSeqdesc_CI it(bsh, CSeqdesc::e_User);  it;  ++it) {

         moduop.Reset(s_FindModelEvidanceUop(it->GetUser()));

         if (moduop.NotEmpty()) {

             result = true;

             CConstRef<CUser_field> ufp;

             if( moduop->HasField("Contig Name") ) {

                 ufp = &(moduop->GetField("Contig Name"));

                 if ( ufp.NotEmpty()  &&  ufp->IsSetData()  &&  ufp->GetData().IsStr() ) {

                     me.name = ufp->GetData().GetStr();

                 }

             }

             if( moduop->HasField("Assembly") ) {

                 ufp = &(moduop->GetField("Assembly"));

                 if ( ufp.NotEmpty()  &&  ufp->IsSetData()  &&  ufp->GetData().IsFields() ) {

                     ITERATE(CUser_field::C_Data::TFields, fld_itr, ufp->GetData().GetFields()) {

                         const CUser_field& field = **fld_itr;

                         ITERATE(CUser_field::C_Data::TFields, inr_itr, field.GetData().GetFields()) {

                             const CUser_field& ufld = **inr_itr;

                             if ( !ufld.IsSetLabel()  ||  !ufld.GetLabel().IsStr() ) continue;

                             const string& label = ufld.GetLabel().GetStr();

                             if (label != "accession") continue;

                             const CUser_field::C_Data& data = ufld.GetData();

                             if (data.IsStr()) {

                                 const string& accn = data.GetStr();

                                 me.assembly.push_back(accn);

                             }

                         }

                     }

                 }

             }

             if ( moduop->HasField("Method") ) {

                 ufp = &(moduop->GetField("Method"));

                 if ( ufp.NotEmpty()  &&  ufp->IsSetData()  &&  ufp->GetData().IsStr() ) {

                     me.method = ufp->GetData().GetStr();

                 }

             }

             if ( moduop->HasField("Counts") ) {

                 ufp = &(moduop->GetField("Counts"));

                 if ( ufp->HasField("mRNA")) {

                      me.mrnaEv = true;

                 }

                 if ( ufp->HasField("EST")) {

                      me.estEv = true;

                 }

             }

             if ( moduop->HasField("mRNA") ) {

                 me.mrnaEv = true;

             }

             if ( moduop->HasField("EST") ) {

                 me.estEv = true;

             }

             if( moduop->HasField("Contig Gi") ) {

                 ufp = &(moduop->GetField("Contig Gi"));

                 if ( ufp.NotEmpty()  &&  ufp->IsSetData()  &&  ufp->GetData().IsInt() ) {

                     me.gi = GI_FROM(CUser_field::C_Data::TInt, ufp->GetData().GetInt());

                 }

             }

             if( moduop->HasField("Contig Span") ) {

                 ufp = &(moduop->GetField("Contig Span"));

                 if ( ufp.NotEmpty()  &&  ufp->IsSetData()  &&  ufp->GetData().IsInts()

                     && ufp->IsSetNum() && ufp->GetNum() == 2 && ufp->GetData().GetInts().size() == 2 )

                 {

                     const CUser_field::C_Data::TInts & int_list = ufp->GetData().GetInts();

                     me.span.first  = int_list[0];

                     me.span.second = int_list[1];

                 }

             }

         }

     }


     // if me.name is missing version, try to update from me.gi

     if( me.gi > ZERO_GI && me.name.find('.') == string::npos ) {

         CSeq_id_Handle accver_idh = bsh.GetScope().GetAccVer( CSeq_id_Handle::GetGiHandle(me.gi) );

         if( accver_idh ) {

             CConstRef<CSeq_id> accver_seq_id = accver_idh.GetSeqIdOrNull();

             if( accver_seq_id ) {

                 const CTextseq_id *text_id = accver_seq_id->GetTextseq_Id();

                 if( text_id && text_id->IsSetAccession() && text_id->IsSetVersion() ) {

                     me.name = text_id->GetAccession() + "." + NStr::IntToString(text_id->GetVersion());

                 }

             }

         }

     }


     return result;

 }


 bool GetModelEvidance(const CBioseq_Handle& bsh, SModelEvidance& me)

 {

     if ( s_GetModelEvidance(bsh, me) ) {

         return true;

     }


     if ( CSeq_inst::IsAa(bsh.GetInst_Mol()) ) {

         CBioseq_Handle nuc = sequence::GetNucleotideParent(bsh);

         if ( nuc  ) {

             return s_GetModelEvidance(nuc, me);

         }

     }


     return false;

 }


 // in Ncbistdaa order

 static const char* kAANames[] = {

     "---", "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",

     "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val",

     "Trp", "OTHER", "Tyr", "Glx", "Sec", "TERM", "Pyl", "Xle"

 };


 const char* GetAAName(unsigned char aa, bool is_ascii)

 {

     if (is_ascii) {

         aa = (unsigned char)

              CSeqportUtil::GetMapToIndex(CSeq_data::e_Ncbieaa,

                                          CSeq_data::e_Ncbistdaa, aa);

     }

     return (aa < sizeof(kAANames)/sizeof(*kAANames)) ? kAANames[aa] : "OTHER";

 }


 //////////////////////////////////////////////////////////////////////////////


 EResolveOrder GetResolveOrder(CScope& scope,

                               const CSeq_id_Handle& mrna,

                               const CSeq_id_Handle& prot,

                               CBioseq_Handle& mrna_bsh,

                               CBioseq_Handle& prot_bsh)

 {

     EResolveOrder order = eResolve_NotFound;


     if (order == eResolve_NotFound) {

         CRef<CScope> local_scope(new CScope(*CObjectManager::GetInstance()));

         local_scope->AddDefaults();


         CBioseq_Handle possible_mrna = local_scope->GetBioseqHandle(mrna);

         CBioseq_Handle possible_prot;

         if (possible_mrna) {

             possible_prot =

                 possible_mrna.GetTopLevelEntry().GetBioseqHandle(prot);

         }

         if (possible_mrna  &&  possible_prot) {

             order = eResolve_RnaFirst;

         }

     }


     if (order == eResolve_NotFound) {

         CRef<CScope> local_scope(new CScope(*CObjectManager::GetInstance()));

         local_scope->AddDefaults();


         CBioseq_Handle possible_prot = local_scope->GetBioseqHandle(prot);

         CBioseq_Handle possible_mrna;

         if (possible_prot) {

             possible_mrna =

                 possible_prot.GetTopLevelEntry().GetBioseqHandle(mrna);

         }


         if (possible_mrna  &&  possible_prot) {

             order = eResolve_ProtFirst;

         }

     }


     switch (order) {

     case eResolve_NotFound:

         mrna_bsh = CBioseq_Handle();

         prot_bsh = CBioseq_Handle();

         break;


     case eResolve_RnaFirst:

         mrna_bsh = scope.GetBioseqHandle(mrna);

         prot_bsh = scope.GetBioseqHandle(prot);

         break;


     case eResolve_ProtFirst:

         prot_bsh = scope.GetBioseqHandle(prot);

         mrna_bsh = scope.GetBioseqHandle(mrna);

         break;

     }


     return order;

 }


 //////////////////////////////////////////////////////////////////////////////

 // HTML utils and strings


 //  ============================================================================

 //  Link locations:

 //  ============================================================================

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseNuc =

     "https://www.ncbi.nlm.nih.gov/nuccore/";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseProt =

     "https://www.ncbi.nlm.nih.gov/protein/";


 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseEntrezViewer =

     "https://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val="; // https forwarded to http


 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseTaxonomy  =

     "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseTransTable =

     "https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c#SG";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBasePubmed =

     "https://www.ncbi.nlm.nih.gov/pubmed/";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseExpasy =

     "https://enzyme.expasy.org/EC/"; // not government site

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseNucSearch =

     "https://www.ncbi.nlm.nih.gov/sites/entrez?db=Nucleotide&amp;cmd=Search&amp;term=";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseGenomePrj =

     "https://www.ncbi.nlm.nih.gov/bioproject/";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseLatLon =

     "https://www.ncbi.nlm.nih.gov/projects/Sequin/latlonview.html";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseGeneOntology =

     "http://amigo.geneontology.org/amigo/term/GO:"; // not government site

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseGeneOntologyRef =

     "http://www.geneontology.org/cgi-bin/references.cgi#GO_REF:"; // not government site

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseUSPTO =

     "https://ppubs.uspto.gov/pubwebapp/external.html?q=";

 NCBI_XOBJEDIT_EXPORT const char* strLinkBaseUniProt =

     "https://www.uniprot.org/uniprot/";


 NCBI_XOBJEDIT_EXPORT const char* strDocLink =

     "https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/";


 namespace {

     // make sure we're not "double-sanitizing"

     // (e.g. "&gt;" to "&amp;gt;")

     //  ============================================================================

     template<typename _T>

     bool s_ShouldWeEscapeAmpersand(

         _T str_iter, // yes, COPY not reference

         const _T &str_iter_end)

         //  ============================================================================

     {

         _ASSERT(*str_iter == '&');


         // This is a long-winded way of checking if str_iter

         // is at "&gt;", "&lt;", "&quot;" or "&amp;"

         // I'm concerned about regexes being too slow.


         ++str_iter;

         if (str_iter != str_iter_end) {

             switch (*str_iter) {

             case 'g':

             case 'l':

                 ++str_iter;

                 if (str_iter != str_iter_end && *str_iter == 't') {

                     ++str_iter;

                     if (str_iter != str_iter_end && *str_iter == ';') {

                         return false;

                     }

                 }

                 break;

             case 'a':

                 ++str_iter;

                 if (str_iter != str_iter_end && *str_iter == 'm') {

                     ++str_iter;

                     if (str_iter != str_iter_end && *str_iter == 'p') {

                         ++str_iter;

                         if (str_iter != str_iter_end && *str_iter == ';') {

                             return false;

                         }

                     }

                 }

                 break;

             case 'q':

                 ++str_iter;

                 if (str_iter != str_iter_end && *str_iter == 'u') {

                     ++str_iter;

                     if (str_iter != str_iter_end && *str_iter == 'o') {

                         ++str_iter;

                         if (str_iter != str_iter_end && *str_iter == 't') {

                             ++str_iter;

                             if (str_iter != str_iter_end && *str_iter == ';') {

                                 return false;

                             }

                         }

                     }

                 }

                 break;

             default:

                 return true;

             }

         }

         return true;

     }


     // see if the '<' opens an HTML tag (currently we

     // only check for a few kinds of tags )

     //  ============================================================================

     template<typename _T>

     bool s_IsTagStart(

         const _T &str_iter,

         const _T &str_iter_end)

         //  ============================================================================

     {

         static const char* possible_tag_starts[] = {

             "<a href=",

             "<acronym title",

             "</a>",

             "</acronym"

         };

         static const size_t num_possible_tag_starts =

             (sizeof(possible_tag_starts) / sizeof(possible_tag_starts[0]));


         // check every string it might start with

         for (int possible_str_idx = 0; possible_str_idx < num_possible_tag_starts; ++possible_str_idx) {

             const string expected_str = possible_tag_starts[possible_str_idx];


             string::size_type idx = 0;

             _T check_str_iter = str_iter;

             for (; check_str_iter != str_iter_end && idx < expected_str.length(); ++idx, ++check_str_iter) {

                 if (*check_str_iter != expected_str[idx]) {

                     break;

                 }

             }


             if (idx == expected_str.length()) {

                 return true;

             }

         }


         // we're in a tag if we matched the whole expected_str

         return false;

     }


 }


 bool ConvertQuotesNotInHTMLTags(string &str)

 {

     bool changes_made = false;


     bool in_tag = false;

     size_t idx = 0;

     for (; idx < str.length(); ++idx) {

         switch (str[idx]) {

         case '<':

             // heuristic

             in_tag = true;

             break;

         case '>':

             in_tag = false;

             break;

         case '"':

             if (!in_tag) {

                 str[idx] = '\'';

                 changes_made = true;

             }

             break;

         }

     }


     return changes_made;

 }


 //  ============================================================================

 void TryToSanitizeHtml(string &str)

 {

     string result;

     // The "* 1.1" should keep up efficient in most cases since data tends not to have

     // too many characters that need escaping.

     result.reserve(1 + (int)((double)str.length() * 1.1));

     TryToSanitizeHtml(result, str);


     // swap is faster than assignment

     str.swap(result);

 }


 void TryToSanitizeHtml(std::string &result, const CTempString& str)

 //  ============================================================================

 {

     result.clear();


     // we only sanitize when we're not in an url

     bool in_html_tag = false;

     ITERATE(CTempString, str_iter, str) {

         // see if we're entering an HTML tag

         if (!in_html_tag && *str_iter == '<' && s_IsTagStart(str_iter, str.end())) {

             in_html_tag = true;

         }


         // now that we know whether we're in a tag,

         // process characters appropriately.

         if (in_html_tag) {

             switch (*str_iter) {

             case '&':

                 // make sure we're not "double-sanitizing"

                 // (e.g. "&gt;" to "&amp;gt;")

                 if (s_ShouldWeEscapeAmpersand(str_iter, str.end())) {

                     result += "&amp;";

                 }

                 else {

                     result += '&';

                 }

                 break;

             default:

                 result += *str_iter;

                 break;

             }

         }

         else {

             switch (*str_iter) {

             case '<':

                 result += "&lt;";

                 break;

             case '>':

                 result += "&gt;";

                 break;

             default:

                 result += *str_iter;

                 break;

             }

         }


         // see if we're exiting an HTML tag

         if (in_html_tag && *str_iter == '>') {

             // tag is closed now

             // (Note: does this consider cases where '>' is in quotes?)

             in_html_tag = false;

         }

     }

 }


 void

 TryToSanitizeHtmlList( std::list<std::string> &strs )

 {

     NON_CONST_ITERATE( std::list<std::string>, str_iter, strs ) {

         TryToSanitizeHtml( *str_iter );

     }

 }


 bool

 CommentHasSuspiciousHtml( const string &str )

 {

     // list is not complete, still need to take proper precautions

     static const char* bad_html_strings[] = {

         "<script", "<object", "<applet", "<embed", "<form",

         "javascript:", "vbscript:"

     };


     // load matching fsa if not already done

     static CSafeStatic<CTextFsa> fsa;

     if( ! fsa->IsPrimed() ) {

         for( size_t ii = 0; ii < ArraySize(bad_html_strings); ++ii ) {

             fsa->AddWord( bad_html_strings[ii] );

         }

         fsa->Prime();

     }


     // do the match

     int current_state = 0;

     for ( SIZE_TYPE str_idx = 0 ; str_idx < str.length(); ++str_idx) {

         const char ch = str[str_idx];

         int next_state = fsa->GetNextState (current_state, ch);

         if (fsa->IsMatchFound (next_state)) {

             return true;

         }

         current_state = next_state;

     }


     return false;

 }


 END_SCOPE(objects)

 END_NCBI_SCOPE

Bioseq.hpp

Date.hpp

Delta_ext.hpp
User-defined methods of the data storage class.

Delta_seq.hpp
User-defined methods of the data storage class.

MolInfo.hpp

Object_id.hpp

Seq_ext.hpp
User-defined methods of the data storage class.

Seq_inst.hpp

Seq_literal.hpp
User-defined methods of the data storage class.

Seq_loc.hpp

User_field.hpp

User_object.hpp

eFlatFileCodes::prot
@ prot

eFlatFileCodes::unk
@ unk

eFlatFileCodes::nuc
@ nuc

_T
#define _T
Definition: bam_coverage_graph_panel.cpp:148

bioseq_handle.hpp

CBioseq_Handle
CBioseq_Handle –.
Definition: bioseq_handle.hpp:91

CConstRef< CUser_object >

CDate
Definition: Date.hpp:53

CDate::GetDate
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
Definition: Date.hpp:149

CInt_fuzz
Definition: Int_fuzz.hpp:54

CIsGraph
Definition: objutil.cpp:404

CIsGraph::operator()
bool operator()(const char c)
Definition: objutil.cpp:406

CNcbiOstrstreamToString
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802

CNcbistrstream_Base< IO_PREFIX::ostrstream, IOS_BASE::out >

CRef< CScope >

CSafeStatic
CSafeStatic<>::
Definition: ncbi_safe_static.hpp:448

CScope
CScope –.
Definition: scope.hpp:92

CSeq_id_Handle
Definition: seq_id_handle.hpp:158

CSeq_inst::IsAa
bool IsAa(void) const
Definition: Seq_inst.hpp:113

CSeq_literal
Definition: Seq_literal.hpp:54

CSeq_loc
Definition: Seq_loc.hpp:94

CSeqdesc_CI
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65

CSeqportUtil::GetMapToIndex
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
Definition: seqport_util.cpp:1172

CStaticArraySet
Definition: static_set.hpp:824

CTempString
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65

CTextseq_id
Definition: Textseq_id.hpp:51

CUser_field_Base::C_Data
field contents
Definition: User_field_.hpp:98

CUser_field
Definition: User_field.hpp:51

CUser_field::HasField
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
Definition: User_field.cpp:393

CUser_object
Definition: User_object.hpp:51

CUser_object::HasField
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
Definition: User_object.cpp:138

CUser_object::GetField
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71

ncbistd.hpp
Include a standard set of the NCBI C++ Toolkit most basic headers.

out
std::ofstream out("events_result.xml")
main entry point for tests

test
#define test(a, b, c, d, e)
Definition: numeric.c:170

next
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56

str
static const char * str(char *buf, int n)
Definition: stats.c:84

data
char data[12]
Definition: iconv.c:80

general_macros.hpp
Utility macros and typedefs for exploring NCBI objects from general.asn.

GI_FROM
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086

ArraySize
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532

ITERATE
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815

NON_CONST_ITERATE
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822

ZERO_GI
#define ZERO_GI
Definition: ncbimisc.hpp:1088

string
string
Definition: cgiapp.hpp:690

CSeq_id::IdentifyAccession
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634

CSeq_id_Handle::GetSeqIdOrNull
CConstRef< CSeq_id > GetSeqIdOrNull(void) const
Definition: seq_id_handle.hpp:315

CSeq_id_Handle::GetGiHandle
static CSeq_id_Handle GetGiHandle(TGi gi)
Faster way to create a handle for a gi.
Definition: seq_id_handle.hpp:188

CSeq_id::GetTextseq_Id
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169

CSeq_id::eAcc_unknown
@ eAcc_unknown
Definition: Seq_id.hpp:322

GetLength
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
Definition: seq_loc_util.cpp:71

GetNucleotideParent
const CBioseq * GetNucleotideParent(const CBioseq &product, CScope *scope)
Get the encoding nucleotide sequnce of a protein.
Definition: sequence.cpp:2660

CScope::GetAccVer
CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh, TGetFlags flags=0)
Get accession.version Seq-id Returns null CSeq_id_Handle if the sequence is not found or if it doesn'...
Definition: scope.cpp:413

CObjectManager::GetInstance
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
Definition: object_manager.cpp:102

CScope::GetBioseqHandle
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95

CScope::AddDefaults
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504

CBioseq_Handle::GetInst_Ext
const TInst_Ext & GetInst_Ext(void) const
Definition: bioseq_handle.cpp:363

CBioseq_Handle::IsSetInst_Ext
bool IsSetInst_Ext(void) const
Definition: bioseq_handle.cpp:351

CBioseq_Handle::GetInst_Mol
TInst_Mol GetInst_Mol(void) const
Definition: bioseq_handle.cpp:243

CSeq_entry_Handle::GetBioseqHandle
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
Definition: seq_entry_handle.cpp:159

CBioseq_Handle::IsSetInst
bool IsSetInst(void) const
Definition: bioseq_handle.cpp:195

CBioseq_Handle::IsSetInst_Repr
bool IsSetInst_Repr(void) const
Definition: bioseq_handle.cpp:213

CBioseq_Handle::GetInst_Repr
TInst_Repr GetInst_Repr(void) const
Definition: bioseq_handle.cpp:225

CBioseq_Handle::GetScope
CScope & GetScope(void) const
Get scope this handle belongs to.
Definition: bioseq_handle.hpp:791

CBioseq_Handle::GetTopLevelEntry
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
Definition: bioseq_handle.cpp:539

CConstRef::Reset
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439

CConstRef::NotEmpty
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392

Uint2
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101

END_NCBI_SCOPE
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103

END_SCOPE
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75

BEGIN_NCBI_SCOPE
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100

BEGIN_SCOPE
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72

SIZE_TYPE
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132

kEmptyStr
#define kEmptyStr
Definition: ncbistr.hpp:123

NStr::EndsWith
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424

NPOS
#define NPOS
Definition: ncbistr.hpp:133

NStr::IntToString
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078

NStr::Find
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882

CTempString::data
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313

NStr::StartsWith
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406

CTempString::length
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320

NStr::ReplaceInPlace
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396

NStr::ToUpper
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424

CTempString::size
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327

NCBI_XOBJEDIT_EXPORT
#define NCBI_XOBJEDIT_EXPORT
Definition: ncbi_export.h:1291

label
static const char label[]
Definition: sec_negotiate_gnutls.h:210

CUser_field_Base::C_Data::GetStr
const TStr & GetStr(void) const
Get the variant data.
Definition: User_field_.hpp:694

CObject_id_Base::IsStr
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291

CUser_object_Base::CanGetType
bool CanGetType(void) const
Check if it is safe to call GetType method.
Definition: User_object_.hpp:299

CInt_fuzz_Base::IsLim
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636

CUser_field_Base::GetData
const TData & GetData(void) const
Get the Data member data.
Definition: User_field_.hpp:1024

CUser_field_Base::CanGetData
bool CanGetData(void) const
Check if it is safe to call GetData method.
Definition: User_field_.hpp:1018

CUser_field_Base::C_Data::GetFields
const TFields & GetFields(void) const
Get the variant data.
Definition: User_field_.hpp:901

CUser_field_Base::C_Data::TInts
vector< int > TInts
Definition: User_field_.hpp:186

CUser_field_Base::C_Data::TFields
vector< CRef< CUser_field > > TFields
Definition: User_field_.hpp:189

CInt_fuzz_Base::GetLim
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642

CUser_field_Base::C_Data::IsFields
bool IsFields(void) const
Check if variant Fields is selected.
Definition: User_field_.hpp:895

CUser_field_Base::C_Data::IsInt
bool IsInt(void) const
Check if variant Int is selected.
Definition: User_field_.hpp:708

CUser_field_Base::C_Data::IsStr
bool IsStr(void) const
Check if variant Str is selected.
Definition: User_field_.hpp:688

CUser_field_Base::C_Data::TObjects
vector< CRef< CUser_object > > TObjects
Definition: User_field_.hpp:190

CUser_field_Base::IsSetLabel
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
Definition: User_field_.hpp:935

CObject_id_Base::GetStr
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297

CUser_field_Base::IsSetNum
bool IsSetNum(void) const
required for strs, ints, reals, oss Check if a value has been assigned to Num data member.
Definition: User_field_.hpp:965

CUser_field_Base::C_Data::GetInt
TInt GetInt(void) const
Get the variant data.
Definition: User_field_.hpp:714

CUser_field_Base::C_Data::IsInts
bool IsInts(void) const
Check if variant Ints is selected.
Definition: User_field_.hpp:835

CUser_object_Base::GetData
const TData & GetData(void) const
Get the Data member data.
Definition: User_object_.hpp:335

CUser_field_Base::GetLabel
const TLabel & GetLabel(void) const
Get the Label member data.
Definition: User_field_.hpp:947

CUser_object_Base::GetType
const TType & GetType(void) const
Get the Type member data.
Definition: User_object_.hpp:305

CUser_field_Base::IsSetData
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
Definition: User_field_.hpp:1012

CUser_object_Base::TData
vector< CRef< CUser_field > > TData
Definition: User_object_.hpp:97

CUser_field_Base::C_Data::GetInts
const TInts & GetInts(void) const
Get the variant data.
Definition: User_field_.hpp:841

CUser_field_Base::C_Data::TInt
int TInt
Definition: User_field_.hpp:180

CUser_field_Base::GetNum
TNum GetNum(void) const
Get the Num member data.
Definition: User_field_.hpp:984

CUser_field_Base::C_Data::e_Objects
@ e_Objects
Definition: User_field_.hpp:124

CUser_field_Base::C_Data::e_Object
@ e_Object
for using other definitions
Definition: User_field_.hpp:118

CInt_fuzz_Base::eLim_unk
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210

CTextseq_id_Base::IsSetAccession
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
Definition: Textseq_id_.hpp:328

CTextseq_id_Base::GetVersion
TVersion GetVersion(void) const
Get the Version member data.
Definition: Textseq_id_.hpp:441

CSeq_loc_Base::IsNull
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504

CTextseq_id_Base::IsSetVersion
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
Definition: Textseq_id_.hpp:422

CTextseq_id_Base::GetAccession
const TAccession & GetAccession(void) const
Get the Accession member data.
Definition: Textseq_id_.hpp:340

CSeq_literal_Base::GetLength
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_literal_.hpp:261

CSeq_literal_Base::CanGetLength
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
Definition: Seq_literal_.hpp:248

CSeq_literal_Base::GetFuzz
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_literal_.hpp:301

CSeq_ext_Base::IsDelta
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336

CSeq_ext_Base::GetDelta
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180

CSeq_literal_Base::CanGetFuzz
bool CanGetFuzz(void) const
Check if it is safe to call GetFuzz method.
Definition: Seq_literal_.hpp:295

CSeq_literal_Base::CanGetSeq_data
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
Definition: Seq_literal_.hpp:316

CDelta_ext_Base::Get
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164

CDelta_ext_Base::Tdata
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89

CSeq_literal_Base::GetSeq_data
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_literal_.hpp:322

CSeq_data_Base::Which
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_data_.hpp:475

CSeq_inst_Base::eRepr_delta
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100

CMolInfo_Base::eTech_both
@ eTech_both
concept transl. w/ partial pept. seq.
Definition: MolInfo_.hpp:133

CMolInfo_Base::eTech_seq_pept_homol
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
Definition: MolInfo_.hpp:135

CMolInfo_Base::eTech_seq_pept_overlap
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
Definition: MolInfo_.hpp:134

CMolInfo_Base::eTech_concept_trans
@ eTech_concept_trans
conceptual translation
Definition: MolInfo_.hpp:131

CMolInfo_Base::eTech_seq_pept
@ eTech_seq_pept
peptide was sequenced
Definition: MolInfo_.hpp:132

CMolInfo_Base::eTech_concept_trans_a
@ eTech_concept_trans_a
conceptual transl. supplied by author
Definition: MolInfo_.hpp:136

CSeq_data_Base::e_Gap
@ e_Gap
gap types
Definition: Seq_data_.hpp:114

CSeq_data_Base::e_Ncbieaa
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111

CSeq_data_Base::e_Ncbistdaa
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113

CSeqdesc_Base::e_User
@ e_User
user defined object
Definition: Seqdesc_.hpp:124

CDelta_seq_Base::e_Literal
@ e_Literal
a piece of sequence
Definition: Delta_seq_.hpp:90

CDelta_seq_Base::e_Loc
@ e_Loc
point to a sequence
Definition: Delta_seq_.hpp:89

int
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210

i
int i
Definition: lex.newick.cpp:1456

len
int len
Definition: lex.newick.cpp:1450

text
static void text(MDB_val *v)
Definition: mdb_dump.c:62

objects
Definition: wiggle_export_job.hpp:44

t
EIPRangeType t
Definition: ncbi_localip.c:101

ncbi_pch.hpp

isspace
int isspace(Uchar c)
Definition: ncbictype.hpp:69

isalnum
int isalnum(Uchar c)
Definition: ncbictype.hpp:62

isdigit
int isdigit(Uchar c)
Definition: ncbictype.hpp:64

isgraph
int isgraph(Uchar c)
Definition: ncbictype.hpp:65

ispunct
int ispunct(Uchar c)
Definition: ncbictype.hpp:68

max
T max(T x_, T y_)
Definition: njn_function.hpp:105

format
static Format format
Definition: njn_ioutil.cpp:53

in
std::istream & in(std::istream &in_, double &x_)

object_manager.hpp
The Object manager core.

strLinkBaseGeneOntologyRef
const char * strLinkBaseGeneOntologyRef
Definition: objutil.cpp:1660

s_IsModelEvidanceUop
bool s_IsModelEvidanceUop(const CUser_object &uo)
Definition: objutil.cpp:1394

strLinkBaseGenomePrj
const char * strLinkBaseGenomePrj
Definition: objutil.cpp:1654

strLinkBaseProt
const char * strLinkBaseProt
Definition: objutil.cpp:1638

StripSpaces
bool StripSpaces(string &str)
Definition: objutil.cpp:256

strLinkBaseNucSearch
const char * strLinkBaseNucSearch
Definition: objutil.cpp:1652

TryToSanitizeHtml
void TryToSanitizeHtml(string &str)
Definition: objutil.cpp:1803

RemovePeriodFromEnd
bool RemovePeriodFromEnd(string &str, bool keep_ellipsis)
Definition: objutil.cpp:299

SAFE_CONST_STATIC_STRING
SAFE_CONST_STATIC_STRING(kLegalPathChars, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_-.")

strLinkBaseNuc
const char * strLinkBaseNuc
Definition: objutil.cpp:1636

strLinkBaseEntrezViewer
const char * strLinkBaseEntrezViewer
Definition: objutil.cpp:1641

comma_space
#define comma_space
Definition: objutil.cpp:820

strLinkBaseTaxonomy
const char * strLinkBaseTaxonomy
Definition: objutil.cpp:1644

space_comma
#define space_comma
Definition: objutil.cpp:816

strLinkBaseGeneOntology
const char * strLinkBaseGeneOntology
Definition: objutil.cpp:1658

space_semicolon
#define space_semicolon
Definition: objutil.cpp:819

TryToSanitizeHtmlList
void TryToSanitizeHtmlList(std::list< std::string > &strs)
Definition: objutil.cpp:1871

s_FindModelEvidanceUop
const CUser_object * s_FindModelEvidanceUop(const CUser_object &uo)
Definition: objutil.cpp:1401

s_IsWholeWord
static bool s_IsWholeWord(const string &str, size_t pos)
Definition: objutil.cpp:1040

ExpandTildes
void ExpandTildes(string &s, ETildeStyle style)
Definition: objutil.cpp:152

DateToString
void DateToString(const CDate &date, string &str, EDateToString format_choice)
Definition: objutil.cpp:1238

TrimSpaces
void TrimSpaces(string &str, size_t indent)
Definition: objutil.cpp:385

JoinString
void JoinString(string &to, const string &prefix, const string &str, bool noRedundancy)
Definition: objutil.cpp:1050

semicolon_space
#define semicolon_space
Definition: objutil.cpp:821

bracket_space
#define bracket_space
Definition: objutil.cpp:818

strLinkBaseLatLon
const char * strLinkBaseLatLon
Definition: objutil.cpp:1656

strLinkBaseTransTable
const char * strLinkBaseTransTable
Definition: objutil.cpp:1646

GetDeltaSeqSummary
void GetDeltaSeqSummary(const CBioseq_Handle &seq, SDeltaSeqSummary &summary)
Definition: objutil.cpp:1257

TrimSpacesAndJunkFromEnds
bool TrimSpacesAndJunkFromEnds(string &str, bool allow_ellipsis)
Definition: objutil.cpp:475

IsPartOfUrl
bool IsPartOfUrl(const string &sentence, size_t pos)
Definition: objutil.cpp:67

strDocLink
const char * strDocLink
Definition: objutil.cpp:1667

ConvertQuotesNotInHTMLTags
bool ConvertQuotesNotInHTMLTags(string &str)
Definition: objutil.cpp:1774

IsValidAccession
bool IsValidAccession(const string &accn, EAccValFlag flag)
Definition: objutil.cpp:1227

GetAAName
const char * GetAAName(unsigned char aa, bool is_ascii)
Definition: objutil.cpp:1559

twocommas
#define twocommas
Definition: objutil.cpp:813

twosemicolons
#define twosemicolons
Definition: objutil.cpp:815

strLinkBasePubmed
const char * strLinkBasePubmed
Definition: objutil.cpp:1648

strLinkBaseUniProt
const char * strLinkBaseUniProt
Definition: objutil.cpp:1664

space_bracket
#define space_bracket
Definition: objutil.cpp:817

CommentHasSuspiciousHtml
bool CommentHasSuspiciousHtml(const string &str)
Definition: objutil.cpp:1879

twospaces
#define twospaces
Definition: objutil.cpp:814

AddPeriod
void AddPeriod(string &str)
Definition: objutil.cpp:377

kAANames
static const char * kAANames[]
Definition: objutil.cpp:1552

CompressSpaces
string & CompressSpaces(string &str, const bool trim_beginning, const bool trim_end)
Definition: objutil.cpp:416

s_GetModelEvidance
bool s_GetModelEvidance(const CBioseq_Handle &bsh, SModelEvidance &me)
Definition: objutil.cpp:1441

GetTechString
const string & GetTechString(int tech)
Definition: objutil.cpp:1364

CleanAndCompress
void CleanAndCompress(string &dest, const CTempString &instr)
Definition: objutil.cpp:823

GetResolveOrder
EResolveOrder GetResolveOrder(CScope &scope, const CSeq_id_Handle &mrna, const CSeq_id_Handle &prot, CBioseq_Handle &mrna_bsh, CBioseq_Handle &prot_bsh)
Definition: objutil.cpp:1571

strLinkBaseUSPTO
const char * strLinkBaseUSPTO
Definition: objutil.cpp:1662

s_IsValidDotVersion
static bool s_IsValidDotVersion(const string &accn)
Definition: objutil.cpp:1208

GetModelEvidance
bool GetModelEvidance(const CBioseq_Handle &bsh, SModelEvidance &me)
Definition: objutil.cpp:1534

s_RunOfStars
static bool s_RunOfStars(string &s, SIZE_TYPE start, SIZE_TYPE length)
Definition: objutil.cpp:137

ConvertQuotes
void ConvertQuotes(string &str)
Definition: objutil.cpp:240

strLinkBaseExpasy
const char * strLinkBaseExpasy
Definition: objutil.cpp:1650

objutil.hpp

ETildeStyle
ETildeStyle
Definition: objutil.hpp:47

eTilde_newline
@ eTilde_newline
Definition: objutil.hpp:50

eTilde_tilde
@ eTilde_tilde
Definition: objutil.hpp:48

eTilde_space
@ eTilde_space
Definition: objutil.hpp:49

eTilde_comment
@ eTilde_comment
Definition: objutil.hpp:51

eTilde_note
@ eTilde_note
Definition: objutil.hpp:52

EAccValFlag
EAccValFlag
Definition: objutil.hpp:95

eValidateAccDotVer
@ eValidateAccDotVer
Definition: objutil.hpp:97

EResolveOrder
EResolveOrder
Definition: objutil.hpp:160

eResolve_RnaFirst
@ eResolve_RnaFirst
Definition: objutil.hpp:162

eResolve_ProtFirst
@ eResolve_ProtFirst
Definition: objutil.hpp:163

eResolve_NotFound
@ eResolve_NotFound
Definition: objutil.hpp:161

EDateToString
EDateToString
Definition: objutil.hpp:103

eDateToString_cit_sub
@ eDateToString_cit_sub
Definition: objutil.hpp:105

eDateToString_patent
@ eDateToString_patent
Definition: objutil.hpp:106

scope.hpp

seqdesc_ci.hpp

seqport_util.hpp

sequence.hpp

indent
string indent("     ")

l
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition: sljitNativeS390X.c:601

DEFINE_STATIC_ARRAY_MAP_WITH_COPY
#define DEFINE_STATIC_ARRAY_MAP_WITH_COPY(Type, Var, Array)
Definition: static_set.hpp:894

strsearch.hpp
String search utilities.

SDeltaSeqSummary
Definition: objutil.hpp:112

SDeltaSeqSummary::num_segs
size_t num_segs
Definition: objutil.hpp:114

SDeltaSeqSummary::text
string text
Definition: objutil.hpp:113

SDeltaSeqSummary::num_gaps
size_t num_gaps
Definition: objutil.hpp:115

SDeltaSeqSummary::num_faked_gaps
size_t num_faked_gaps
Definition: objutil.hpp:117

SDeltaSeqSummary::residues
size_t residues
Definition: objutil.hpp:116

SModelEvidance
Definition: objutil.hpp:135

SModelEvidance::estEv
bool estEv
Definition: objutil.hpp:142

SModelEvidance::span
TSpanType span
Definition: objutil.hpp:144

SModelEvidance::mrnaEv
bool mrnaEv
Definition: objutil.hpp:141

SModelEvidance::gi
TGi gi
Definition: objutil.hpp:143

SModelEvidance::method
string method
Definition: objutil.hpp:140

SModelEvidance::assembly
list< string > assembly
Definition: objutil.hpp:139

SModelEvidance::name
string name
Definition: objutil.hpp:138

_ASSERT
#define _ASSERT
Definition: test_assert_impl.h:173

result
else result
Definition: token2.c:20

const
#define const
Definition: zconf.h:232