1 #ifndef OBJTOOLS_READERS___AGP_UTIL__HPP
2 #define OBJTOOLS_READERS___AGP_UTIL__HPP
94 return Ref(
new CAgpRow(arg, agp_version, reader));
100 return Ref(
new CAgpRow(agp_version, reader) );
111 int FromString(
const string& line);
113 string GetErrorMessage();
114 string ToString(
bool reorder_linkage_evidences=
false);
166 eOrientationPlus = 43,
167 eOrientationMinus = 45,
168 eOrientationUnknown = 48,
169 eOrientationIrrelevant = 110
190 eGapYes_count=eGapContamination+1
196 fLinkageEvidence_paired_ends = (1 << 0),
197 fLinkageEvidence_align_genus = (1 << 1),
198 fLinkageEvidence_align_xgenus = (1 << 2),
199 fLinkageEvidence_align_trnscpt= (1 << 3),
200 fLinkageEvidence_within_clone = (1 << 4),
201 fLinkageEvidence_clone_contig = (1 << 5),
202 fLinkageEvidence_map = (1 << 6),
203 fLinkageEvidence_strobe = (1 << 7),
204 fLinkageEvidence_pcr = (1 << 8),
205 fLinkageEvidence_proximity_ligation = (1 << 9),
207 fLinkageEvidence_HIGHEST_BIT_MASK = fLinkageEvidence_proximity_ligation,
209 fLinkageEvidence_unspecified = 0,
210 fLinkageEvidence_INVALID = -1,
211 fLinkageEvidence_na = -2
224 return c==
'N' || c==
'U';
229 return c==
'A' || c==
'D' || c==
'P';
233 return gap_type==eGapCentromere || gap_type==eGapTelomere ||
234 gap_type==eGapShort_arm || gap_type==eGapHeterochromatin ||
235 gap_type==eGapContamination;
245 return IsDraftComponent(component_type);
251 if( gap_type==eGapFragment)
return false;
252 return linkage==
false;
257 return GapValidAtObjectEnd(gap_type);
261 static bool CheckComponentEnd(
const string& comp_id,
TAgpPos comp_end,
TAgpLen comp_len,
264 return CheckComponentEnd(GetComponentId(), component_end, comp_len, *m_AgpErr);
268 string LinkageEvidencesToString(
void);
270 static string LinkageEvidenceFlagsToString(
int le);
273 return LinkageEvidenceFlagsToString(linkage_evidence_flags);
275 static const char* le_str(ELinkageEvidence
le);
276 static int str_to_le(
const string&
str);
280 string SubstOldGap(
bool do_subst);
288 int *perror_code,
bool log_errors =
true);
289 int ParseComponentCols(
bool log_errors=
true);
290 int ParseGapCols(
bool log_errors=
true);
293 static const TStr gap_types[eGapCount];
310 void SetErrorHandler(
CAgpErr* arg);
320 return m_agp_version;
353 virtual int ReadStream(
CNcbiIstream& is, EFinalize eFinalize = eFinalize_Yes );
360 return ReadStream(is, (bFinalize ? eFinalize_Yes : eFinalize_No) );
368 virtual int Finalize();
376 virtual string GetErrorMessage(
const string& filename=
NcbiEmptyString);
383 bool ProcessThisRow();
478 void x_CheckPragmaComment(
void);
482 void SetErrorHandler(
CAgpErr* arg);
485 return m_agp_version;
505 virtual void Msg(
int code,
const string& details,
int appliesTo=fAtThisLine);
506 virtual void Msg(
int code,
int appliesTo=fAtThisLine)
513 virtual string GetErrorMessage(
int mask=0xFFFFFFFF);
514 virtual int AppliesTo(
int mask=0xFFFFFFFF);
551 E_Last, E_First=1, E_LastToSkipLine=E_ObjRangeNeComp,
616 G_First = G_InvalidCompId,
622 if(
code==W_ShortGap ||
code==W_AssumingVersion)
return "NOTE";
626 static const char* GetMsg(
int code);
635 static string FormatMessage(
const string&
msg,
const string& details);
655 CODE_First = E_First,
660 CODE_Extended = ((((G_Last / 10 ) + 2) * 10) + 1),
662 CODE_Last=CODE_Extended+20
666 string GetPrintableCode(
int code,
bool strict=
false)
const;
683 const string& filename,
int linenum,
const string& content);
684 static void PrintLineXml(
CNcbiOstream& ostr,
const string& filename,
int linenum,
const string& content,
bool two_lines_involved);
691 static const Uint8 s_StrictModeWarningMask =
692 (
Uint8(1) << (W_ExtraTab -W_First)) |
693 (
Uint8(1) << (W_GapLineMissingCol9 -W_First)) |
694 (
Uint8(1) << (W_NoEolAtEof -W_First)) |
695 (
Uint8(1) << (W_GapLineIgnoredCol9 -W_First)) |
696 (
Uint8(1) << (W_ObjOrderNotNumerical -W_First)) |
697 (
Uint8(1) << (W_GapSizeNot100 -W_First)) |
698 (
Uint8(1) << (W_ShortGap -W_First)) |
699 (
Uint8(1) << (W_CommentsAfterStart -W_First)) |
700 (
Uint8(1) << (W_AssumingVersion -W_First)) |
701 (
Uint8(1) << (W_AGPVersionCommentInvalid -W_First)) |
702 (
Uint8(1) << (W_AGPVersionCommentUnnecessary-W_First)) ;
705 return s_StrictModeWarningMask & (
Uint8(1) << (
code-W_First));
707 const char* ErrorWarningOrNoteEx(
int code);
715 virtual void PrintMessageXml(
CNcbiOstream& ostr,
int code,
const string& details,
int appliesTo);
718 static void PrintTotals(
CNcbiOstream& ostr,
int e_count,
int w_count,
int note_count,
int skipped_count);
721 PrintTotals(ostr, e_count, w_count, 0, skipped_count);
723 static void PrintTotalsXml(
CNcbiOstream& ostr,
int e_count,
int w_count,
int note_count,
int skipped_count);
750 virtual void Msg(
int code,
const string& details,
int appliesTo=fAtThisLine);
751 virtual void Msg(
int code,
int appliesTo=fAtThisLine)
757 virtual void LineDone(
const string& s,
int line_num,
bool invalid_line=
false);
762 void StartFile(
const string& s);
771 string SkipMsg(
const string&
str,
bool skip_other=
false);
774 if(
code>=E_First &&
code<CODE_Last) m_MustSkip[
code] = !skip_other;
777 bool MustSkip(
int code);
785 int CountTotals(
int from,
int to=E_First);
786 int GetCount(EErrCode
code)
const;
795 int m_MsgCount[CODE_Last];
796 char m_MustSkip[CODE_Last];
843 return static_cast<int>(m_InputFiles.size());
848 return m_InputFiles[num-1];
854 m_UpgradedWarnings.insert(
code);
862 return (m_UpgradedWarnings.find(
code) != m_UpgradedWarnings.end());
884 static string GetExpandedPattern(
value_type* p);
@ eAgpVersion_auto
auto-detect using the first gap line
@ eAgpVersion_1_1
AGP spec 1.1.
@ eAgpVersion_2_0
AGP spec 2.0 or later.
Accession naming patterns; find ranges for consequtive digits.
vector< double > TDoubleVec
multimap< int, string > TMapCountToString
Correctly print multiple errors and warnings on consequitive lines; suppress undesired or higly repet...
map< int, string > TMapCcodeToString
void UpgradeToError(EErrCode code)
AutoPtr< CNcbiOstrstream > m_messages
vector< string > m_InputFiles
bool m_two_lines_involved
set< EErrCode > m_UpgradedWarnings
bool TreatAsError(EErrCode code) const
virtual void Msg(int code, int appliesTo=fAtThisLine)
bool TreatAsError(int code) const
AutoPtr< CNcbiOstream > m_out_destroyer
void SkipMsg(int code, bool skip_other=false)
static void PrintTotals(CNcbiOstream &ostr, int e_count, int w_count, int skipped_count)
const string & GetFile(int num)
static bool IsStrictModeWarning(int code)
virtual void Msg(int code, int appliesTo=fAtThisLine)
static const char * ErrorWarningOrNote(int code)
@ W_AGPVersionCommentUnnecessary
@ W_BreakingGapSameCompId
@ W_AGPVersionCommentInvalid
@ W_UnSingleCompNotInFull
@ W_OrientationZeroDeprecated
string m_messages_prev_line
static const TMsgMap sMessageMap
Detects scaffolds, object boundaries, errors that involve 2 consecutive lines, and is intended as a s...
EFinalize
Whether or not the function should call Finalize() when it's done successfully.
CAgpErr * GetErrorHandler()
virtual void OnObjectChange()
CRef< CAgpRow > m_prev_row
CRef< CAgpRow > m_this_row
virtual void OnScaffoldEnd()
EAgpVersion m_agp_version
virtual void OnGapOrComponent()
int ReadStream(CNcbiIstream &is, bool bFinalize)
Deprecated backward-compatibility wrapper.
A container for both the original string column values (Get*() methods) and the values converted to i...
bool CheckComponentEnd(TAgpLen comp_len)
string & GetComponentId()
bool GapValidAtObjectEnd() const
static CRef< CAgpRow > New(CAgpErr *arg, EAgpVersion agp_version=eAgpVersion_auto, CAgpReader *reader=nullptr)
string LinkageEvidenceFlagsToString()
CAgpErr * GetErrorHandler()
int linkage_evidence_flags
a bit map which holds summary of info in linkage_evidences.
map< string, EGap > TMapStrEGap
static bool IsGap(char c)
bool GapEndsScaffold() const
string & GetOrientation()
static bool GapValidAtObjectEnd(EGap gap_type)
EAgpVersion m_agp_version
static bool IsDraftComponent(char c)
string & GetComponentType()
TLinkageEvidenceVec linkage_evidences
static const char * GapTypeToString(int i)
CRef< CAgpRow > Clone(void) const
static CRef< CAgpRow > New(EAgpVersion agp_version=eAgpVersion_auto, CAgpReader *reader=nullptr)
string & GetComponentEnd()
static CSafeStatic< TMapStrEGap > gap_type_codes
string & GetComponentBeg()
string & GetLinkageEvidence()
vector< ELinkageEvidence > TLinkageEvidenceVec
Might have duplicates, and is empty on error or if there are no actual linkage evidences (e....
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::iterator iterator
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
std::ofstream out("events_result.xml")
main entry point for tests
static unsigned int line_num
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
@ eNoOwnership
No ownership is assumed.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
NCBI_NS_STD::string::size_type SIZE_TYPE
enum ENcbiOwnership EOwnership
Ownership relations between objects.
#define NCBI_XOBJREAD_EXPORT
double value_type
The numeric datatype used by the parser.
Static variables safety - create on demand, destroy on application termination.
Multi-threading – mutexes; rw-locks; semaphore.
bool le(T x_, T y_, T round_)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
string ToString(const wxRect &rc)