NCBI C++ ToolKit
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: utilfun.h 102128 2024-04-05 17:42:12Z stakhovv $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: utilfun.h
27  *
28  * Author: Karl Sirotkin, Hsiu-Chuan Chen
29  *
30  * File Description:
31  * Utility functions for parser.
32  *
33  */
35 #ifndef _UTILFUN_
36 #define _UTILFUN_
38 /* for unknown Moltype
39  */
40 #define Unknown 0
42 /* for Unknown Keyword type
43  */
44 #define ParFlat_UNKW 999
48 #include <objmgr/scope.hpp>
51 #include "ftablock.h"
52 #include "entry.h"
58 /**********************************************************/
60 /* relative routines for tokenize string
61  */
65 bool ParseAccessionRange(TokenStatBlk* tsbp, unsigned skip);
66 bool ParseAccessionRange(list<string>& tokens, unsigned skip);
67 void UnwrapAccessionRange(const objects::CGB_block::TExtra_accessions& extra_accs, objects::CGB_block::TExtra_accessions& hist);
69 /* Return array position of the matched length of string in array_string.
70  * Return -1 if no match.
71  */
72 Int2 fta_StringMatch(const Char** array, const Char* text);
74 /* Return array position of the matched length of string (ignored case)
75  * in array_string.
76  * Return -1 if no match.
77  */
78 Int2 StringMatchIcase(const Char** array, const Char* text);
80 char* StringIStr(const Char* where, const Char* what);
82 /* Return array position of the string in the array_string.
83  * Return -1 if no match.
84  */
85 Int2 MatchArrayString(const Char** array, const Char* text);
86 Int2 MatchArrayIString(const Char** array, const Char* text);
88 /* Return array position of the string in the array_string if any
89  * array_string is in the substring of "text".
90  * Return -1 if no match.
91  */
92 Int2 MatchArraySubString(const Char** array, const Char* text);
93 Int2 MatchArrayISubString(const Char** array, const Char* text);
95 /* Return a string which replace newline to blank and skip "XX" line data.
96  */
97 string GetBlkDataReplaceNewLine(string_view, Int2 indent);
99 /* Delete any tailing ' ', '\n', '\\', ',', ';', '~', '.', ':'
100  * characters.
101  */
102 void CleanTailNoneAlphaChar(char* str);
105 char* PointToNextToken(char* ptr);
107 /* Return the current token which ptr points to and ptr will points to
108  * next token after the routine return.
109  */
110 char* GetTheCurrentToken(char** ptr);
112 /* Search The character letter.
113  * Return NULL if not found; otherwise, return a pointer points first
114  * occurrence The character.
115  */
116 char* SrchTheChar(char* bptr, char* eptr, Char letter);
118 /* Search The string.
119  * Return NULL if not found; otherwise, return a pointer points first
120  * occurrence The string.
121  */
122 char* SrchTheStr(char* bptr, char* eptr, const char* str);
124 void CpSeqId(InfoBioseq* ibp, const objects::CSeq_id& id);
126 // int SrchKeyword(const string& ptr, const vector<string>& keywordList);
127 int SrchKeyword(const CTempString& ptr, const vector<string>& keywordList);
128 bool CheckLineType(char* ptr, Int4 line, const vector<string>& keywordList, bool after_origin);
129 char* SrchNodeType(DataBlk* entry, Int4 type, size_t* len);
130 char* xSrchNodeType(const DataBlk& entry, Int4 type, size_t* len);
131 string xGetNodeData(const DataBlk& entry, int nodeType);
132 DataBlk* TrackNodeType(const DataBlk& entry, Int2 type);
133 const Section* xTrackNodeType(const Entry&, int type);
134 ValNode* ConstructValNode(objects::CSeq_id::E_Choice choice, const char* data);
135 bool fta_is_tpa_keyword(const char* str);
136 bool fta_tpa_keywords_check(const TKeywordList& kwds);
137 bool fta_is_tsa_keyword(const char* str);
138 bool fta_is_tls_keyword(const char* str);
141 bool fta_check_mga_keywords(objects::CMolInfo& mol_info, const TKeywordList& kwds);
142 void fta_StringCpy(char* dst, const char* src);
144 void fta_keywords_check(const char* str, bool* estk, bool* stsk, bool* gssk, bool* htck, bool* flik, bool* wgsk, bool* tpak, bool* envk, bool* mgak, bool* tsak, bool* tlsk);
146 void fta_remove_keywords(int tech, TKeywordList& kwds);
153 bool IsCancelled(const TKeywordList& keywords);
154 bool HasHtg(const TKeywordList& keywords);
155 void RemoveHtgPhase(TKeywordList& keywords);
156 bool HasHtc(const TKeywordList& keywords);
157 bool SetTextId(Uint1 seqtype, objects::CSeq_id& seqId, objects::CTextseq_id& textId);
159 void check_est_sts_gss_tpa_kwds(ValNodePtr kwds, size_t len, IndexblkPtr entry, bool tpa_check, bool& specialist_db, bool& inferential, bool& experimental, bool& assembly);
161  const std::list<std::string> keywordList,
162  bool tpa_check,
163  IndexblkPtr entry);
165 namespace objects
166 {
167  class CScope;
168 }
170 objects::CScope& GetScope();
175 #endif
User-defined methods of the data storage class.
CScope –.
Definition: scope.hpp:92
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
std::list< std::string > TKeywordList
Definition: ftablock.h:166
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
Definition: cgiapp.hpp:687
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
char Char
Alias for char.
Definition: ncbitype.h:93
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
Define ncbi namespace.
Definition: ncbistl.hpp:100
int len
static void text(MDB_val *v)
Definition: mdb_dump.c:62
const CharType(& source)[N]
Definition: pointer.h:1149
The Object manager core.
static const char delimiter[]
string indent(" ")
Definition: entry.h:57
Definition: entry.h:13
Definition: type.c:6
Int2 MatchArraySubString(const Char **array, const Char *text)
Definition: utilfun.cpp:656
Int2 MatchArrayIString(const Char **array, const Char *text)
Definition: utilfun.cpp:625
bool HasHtg(const TKeywordList &keywords)
Definition: utilfun.cpp:1663
int SrchKeyword(const CTempString &ptr, const vector< string > &keywordList)
Definition: utilfun.cpp:986
bool HasHtc(const TKeywordList &keywords)
Definition: utilfun.cpp:1692
char * SrchTheChar(char *bptr, char *eptr, Char letter)
Definition: utilfun.cpp:847
bool fta_tls_keywords_check(const TKeywordList &kwds, Parser::ESource source)
Definition: utilfun.cpp:1225
void CpSeqId(InfoBioseq *ibp, const objects::CSeq_id &id)
void RemoveHtgPhase(TKeywordList &keywords)
Definition: utilfun.cpp:1677
bool fta_is_tsa_keyword(const char *str)
Definition: utilfun.cpp:1268
bool fta_is_tls_keyword(const char *str)
Definition: utilfun.cpp:1276
bool CheckLineType(char *ptr, Int4 line, const vector< string > &keywordList, bool after_origin)
Definition: utilfun.cpp:999
void xCheckEstStsGssTpaKeywords(const std::list< std::string > keywordList, bool tpa_check, IndexblkPtr entry)
bool SetTextId(Uint1 seqtype, objects::CSeq_id &seqId, objects::CTextseq_id &textId)
void fta_remove_tsa_keywords(TKeywordList &kwds, Parser::ESource source)
Definition: utilfun.cpp:1366
objects::CScope & GetScope()
void check_est_sts_gss_tpa_kwds(ValNodePtr kwds, size_t len, IndexblkPtr entry, bool tpa_check, bool &specialist_db, bool &inferential, bool &experimental, bool &assembly)
Definition: utilfun.cpp:1460
void fta_remove_tpa_keywords(TKeywordList &kwds)
Definition: utilfun.cpp:1352
char * SrchNodeType(DataBlk *entry, Int4 type, size_t *len)
Int2 fta_StringMatch(const Char **array, const Char *text)
Definition: utilfun.cpp:549
bool fta_is_tpa_keyword(const char *str)
Definition: utilfun.cpp:1259
char * StringIStr(const Char *where, const Char *what)
Definition: utilfun.cpp:674
void fta_remove_tls_keywords(TKeywordList &kwds, Parser::ESource source)
Definition: utilfun.cpp:1381
char * xSrchNodeType(const DataBlk &entry, Int4 type, size_t *len)
Definition: utilfun.cpp:1052
string xGetNodeData(const DataBlk &entry, int nodeType)
Definition: utilfun.cpp:1066
CRef< objects::CDate_std > get_full_date(const Char *s, bool is_ref, Parser::ESource source)
Definition: utilfun.cpp:916
char * GetTheCurrentToken(char **ptr)
Definition: utilfun.cpp:815
void fta_keywords_check(const char *str, bool *estk, bool *stsk, bool *gssk, bool *htck, bool *flik, bool *wgsk, bool *tpak, bool *envk, bool *mgak, bool *tsak, bool *tlsk)
Definition: utilfun.cpp:1284
Int2 StringMatchIcase(const Char **array, const Char *text)
Definition: utilfun.cpp:576
void fta_StringCpy(char *dst, const char *src)
Definition: utilfun.cpp:1585
void fta_remove_keywords(int tech, TKeywordList &kwds)
Definition: utilfun.cpp:1321
DataBlk * TrackNodeType(const DataBlk &entry, Int2 type)
Definition: utilfun.cpp:1083
void fta_remove_mag_keywords(TKeywordList &kwds)
Definition: utilfun.cpp:1410
void CleanTailNoneAlphaChar(char *str)
Definition: utilfun.cpp:785
bool IsCancelled(const TKeywordList &keywords)
Definition: utilfun.cpp:1652
ValNode * ConstructValNode(objects::CSeq_id::E_Choice choice, const char *data)
Int2 MatchArrayString(const Char **array, const Char *text)
Definition: utilfun.cpp:607
const Section * xTrackNodeType(const Entry &, int type)
Definition: utilfun.cpp:1097
void FreeTokenblk(TokenBlk *tbp)
bool fta_tsa_keywords_check(const TKeywordList &kwds, Parser::ESource source)
Definition: utilfun.cpp:1191
TokenStatBlk * TokenString(const char *str, Char delimiter)
Definition: utilfun.cpp:489
void fta_remove_env_keywords(TKeywordList &kwds)
Definition: utilfun.cpp:1396
bool fta_tpa_keywords_check(const TKeywordList &kwds)
Definition: utilfun.cpp:1109
char * PointToNextToken(char *ptr)
Definition: utilfun.cpp:795
void UnwrapAccessionRange(const objects::CGB_block::TExtra_accessions &extra_accs, objects::CGB_block::TExtra_accessions &hist)
void FreeTokenstatblk(TokenStatBlk *tsbp)
string GetBlkDataReplaceNewLine(string_view, Int2 indent)
Definition: utilfun.cpp:732
bool fta_check_mga_keywords(objects::CMolInfo &mol_info, const TKeywordList &kwds)
bool ParseAccessionRange(TokenStatBlk *tsbp, unsigned skip)
void CleanTailNoneAlphaCharInString(std::string &str)
char * SrchTheStr(char *bptr, char *eptr, const char *str)
Definition: utilfun.cpp:867
Int2 MatchArrayISubString(const Char **array, const Char *text)
Definition: utilfun.cpp:706
static Uint4 letter(char c)
Modified on Sun Apr 14 05:25:12 2024 by rev. 669887