66 #define IS_LOWER(c) ('a'<=(c) && (c)<='z')
67 #define IS_UPPER(c) ('A'<=(c) && (c)<='Z')
69 using namespace sequence;
81 string::size_type first_good_char_pos =
str.find_first_not_of(
" ;,");
82 if( first_good_char_pos == string::npos ) {
86 }
else if( first_good_char_pos > 0 ) {
87 copy(
str.begin() + first_good_char_pos,
str.end(),
str.begin() );
88 str.resize(
str.length() - first_good_char_pos );
95 string::size_type last_good_char_pos =
str.find_last_not_of(
" ;,");
96 _ASSERT( last_good_char_pos != string::npos );
97 if( last_good_char_pos == (
str.length() - 1) ) {
100 }
else if(
str[last_good_char_pos+1] ==
';' ) {
106 string::size_type last_ampersand_pos =
str.find_last_of(
"& ,", last_good_char_pos );
107 if( last_ampersand_pos == string::npos ) {
109 str.resize( last_good_char_pos + 1 );
112 switch(
str[last_ampersand_pos] ) {
115 if( (last_good_char_pos + 2) ==
str.length() ) {
120 str.resize( last_good_char_pos + 2 );
127 str.resize( last_good_char_pos + 1 );
135 str.resize( last_good_char_pos + 1 );
157 bool isPeriod =
false;
158 bool isTilde =
false;
159 int start_of_junk_pos = (
int)
str.length() - 1;
160 for( ; start_of_junk_pos >= 0 ; --start_of_junk_pos ) {
161 const char ch =
str[start_of_junk_pos];
162 if (ch <=
' ' || ch ==
'.' || ch ==
',' || ch ==
'~' || ch ==
';') {
166 isPeriod = (isPeriod || ch ==
'.');
167 isTilde = (isTilde || ch ==
'~');
175 if( start_of_junk_pos < 0 ) {
176 start_of_junk_pos = 0;
179 bool changed =
false;
182 if ( start_of_junk_pos < (
int)
str.length() ) {
185 const char * suffix =
"";
187 const int chars_in_junk = ( (
int)
str.length() - start_of_junk_pos );
192 if ( allow_ellipses && (chars_in_junk >= 3) &&
193 str[start_of_junk_pos+1] ==
'.' &&
str[start_of_junk_pos+2] ==
'.' ) {
196 }
else if (isTilde ) {
198 if (
str[start_of_junk_pos] ==
'~' ) {
199 const bool doubleTilde = ( (chars_in_junk >= 2) &&
str[start_of_junk_pos+1] ==
'~' );
200 suffix = ( doubleTilde ?
"~~" :
"" );
203 if( suffix[0] !=
'\0' ) {
204 if( 0 !=
str.compare( start_of_junk_pos, INT_MAX, suffix) ) {
205 str.erase( start_of_junk_pos );
209 }
else if ( start_of_junk_pos < (
int)
str.length() ) {
210 str.erase( start_of_junk_pos );
216 string::iterator input_iter =
str.begin();
217 while ( input_iter !=
str.end() && *input_iter <=
' ') {
220 if( input_iter !=
str.begin() ) {
221 str.erase(
str.begin(), input_iter );
231 static string whites(
" \t\n\r");
232 bool changed =
false;
234 if (tilde1 ==
NPOS) {
237 SIZE_TYPE tilde2 =
str.find_first_not_of(whites, tilde1 + 1);
238 while (tilde2 !=
NPOS) {
239 if (
str[tilde2] ==
'~') {
240 if ( tilde2 > tilde1 + 1) {
242 str.erase(tilde1+1, tilde2 - tilde1 - 1);
251 tilde1 =
str.find(
'~', tilde2 + 1);
252 if (tilde1 ==
NPOS) {
256 tilde2 =
str.find_first_not_of(whites, tilde1 + 1);
265 bool changed =
false;
278 size_t pos, next_pos;
281 while (pos != string::npos) {
283 bool has_space =
false;
284 while (next_pos <
str.length() && (
str[next_pos] ==
';' ||
str[next_pos] ==
' ' ||
str[next_pos] ==
'\t')) {
285 if (
str[next_pos] ==
' ') {
290 if (next_pos == pos + 1 || (has_space && next_pos == pos + 2)) {
293 }
else if (next_pos ==
str.length()) {
300 str =
str.substr(0, pos + 1) +
" " +
str.substr(next_pos);
302 str =
str.substr(0, pos + 1) +
str.substr(next_pos);
309 #define twocommas ((',') << 8 | (','))
310 #define twospaces ((' ') << 8 | (' '))
311 #define twosemicolons ((';') << 8 | (';'))
312 #define space_comma ((' ') << 8 | (','))
313 #define space_bracket ((' ') << 8 | (')'))
314 #define bracket_space (('(') << 8 | (' '))
315 #define space_semicolon ((' ') << 8 | (';'))
316 #define comma_space ((',') << 8 | (' '))
317 #define semicolon_space ((';') << 8 | (' '))
321 if (
val.length() == 0)
return false;
323 char *
str =
new char[
sizeof(char) * (
val.length() + 1)];
334 unsigned short two_chars;
345 while (curr !=
'\0') {
349 two_chars = (two_chars << 8) |
next;
370 while (
next ==
' ' ||
next ==
',') {
380 while (
next ==
' ' ||
next ==
';') {
389 while (
next ==
' ' ||
next ==
',') {
398 while (
next ==
' ' ||
next ==
';') {
415 if (
str[0] !=
'\0') {
454 printf(
"Use new string\n");
467 if (
val.length() == 0)
return false;
469 char *
str =
new char[
sizeof(char) * (
val.length() + 1)];
480 if (ch !=
'\0' && (ch <=
' ' || ch ==
';' || ch ==
',')) {
481 while (ch !=
'\0' && (ch <=
' ' || ch ==
';' || ch ==
',')) {
502 else if (ch <=
' ') {
508 else if (ch ==
';') {
513 else if (ch ==
',') {
536 printf(
"Use new string\n");
557 if (!
isspace((
unsigned char)(*it))) {
574 const CSeq_loc& loc1,
575 const CSeq_loc& loc2,
634 if (abbrev.length() >= 3) {
642 if (abbrev.length() == 1) {
657 return dbt1->
Compare(*dbt2) < 0;
664 return dbt1->
Compare(*dbt2) == 0;
User-defined methods of the data storage class.
int Compare(const CDbtag &dbt2) const
CGetSeqLocFromStringHelper_ReadLocFromText(CScope *scope)
virtual CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags)
bool RemoveSpacesBetweenTildes(string &str)
remove white space between pairs of tildes.
bool CleanDoubleQuote(string &str)
Change double to single quotes.
bool CleanVisString(string &str)
CRef< CSeq_loc > ReadLocFromText(const string &text, const CSeq_id *id, CScope *scope)
bool s_OrgrefSynCompare(const string &syn1, const string &syn2)
bool s_DbtagEqual(const CRef< CDbtag > &dbt1, const CRef< CDbtag > &dbt2)
void TrimInternalSemicolons(string &str)
remove duplicate internal semicolons.
bool s_DbtagCompare(const CRef< CDbtag > &dbt1, const CRef< CDbtag > &dbt2)
char x_ValidAminoAcid(string_view abbrev)
bool s_OrgrefSynEqual(const string &syn1, const string &syn2)
struct proteinabbrev ProteinAbbrevData
static ProteinAbbrevData abbreviation_list[]
bool RemoveSpaces(string &str)
remove all spaces from a string
bool CleanVisStringJunk(string &str, bool allow_ellipses)
bool Asn2gnbkCompressSpaces(string &val)
weird space compression from C Toolkit
bool TrimSpacesSemicolonsAndCommas(string &val)
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
constexpr size_t ArraySize(const Element(&)[Size])
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
unsigned int
A callback function used to compare two keys in a database.
static void text(MDB_val *v)
std::istream & in(std::istream &in_, double &x_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
CRef< CSeq_loc > GetSeqLocFromString(const string &text, const CSeq_id *id, CGetSeqLocFromStringHelper *helper)
static Uint4 letter(char c)