45 "Clean up an AGP file:\n"
46 "https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/\n"
48 "USAGE: agp_renumber <in.agp >out.agp\n"
50 " - Recalculate the object begin and end coordinates from\n"
51 " the length of the component span or gap length.\n"
52 " - Renumber the part numbers for each object.\n"
53 " - Lowercase gap type and linkage.\n"
54 " - Reorder linkage evidence terms: paired-ends;align_genus;align_xgenus;align_trnscpt;within_clone;clone_contig;map;strobe;pcr\n"
55 " - Reformat white space to conform to the AGP format specification:\n"
56 " - add missing tabs at the ends of gap lines;\n"
57 " - drop blank lines;\n"
58 " - remove extra tabs and spaces at the end of lines;\n"
59 " - add a missing line separator at the end of the file;\n"
60 " - replace spaces with tabs (except in comments).\n";
238 bool had_space =
false;
239 bool had_extra_tab=
false;
240 bool no_eol_at_eof=
false;
241 bool bad_case_gap =
false;
249 char component_type=0;
262 if( tab_count==9 &&
i<s.size()-1 && s[
i+1]==
'#' ) {
282 if(prev_ch==
'\t' && tab_count==4) {
285 if( (component_type==
'N' || component_type==
'U') &&
286 (tab_count==6 || tab_count==7) &&
tolower(ch)!=ch
288 ch=
tolower(ch); bad_case_gap=
true;
293 *
buf <<
'\t' << s.substr(
i);
316 if(
in.eof()) no_eol_at_eof=
true;
327 cerr << renum.
GetErrorMessage()<<
"\nRenumbering not completed because of errors.\n";
331 if(had_space ) cerr <<
"Spaces converted to tabs.\n";
332 if(had_extra_tab ) cerr <<
"Extra tabs removed.\n";
336 if(no_eol_at_eof ) cerr <<
"Line break added at the end of file.\n";
337 if(bad_case_gap ) cerr <<
"Gap type/linkage converted to lower case.\n";
338 if(renum.
reordered_ln_ev) cerr <<
"Linkage evidence terms reordered.\n";
342 cerr << renum.
no_renum_objs <<
" object(s) did not need renumbering.\n";
344 cerr <<
"All lines have proper object_beg, object_end, part_number.\n";
354 int main(
int argc,
char* argv[])
359 else if(argv[1][0]==
'-' || argc > 1+1) {
366 cerr <<
"Error - cannot open for reading: " << argv[1] <<
"\n";
int main(int argc, char *argv[])
int ProcessStream(istream &in, ostream &out)
@ eAgpVersion_auto
auto-detect using the first gap line
virtual void Msg(int code, const string &details, int appliesTo=fAtThisLine)
static const char * GetMsg(int code)
Detects scaffolds, object boundaries, errors that involve 2 consecutive lines, and is intended as a s...
CAgpErr * GetErrorHandler()
virtual int Finalize()
This is called at the end of the file, usually automatically but can be called manually if the automa...
CRef< CAgpRow > m_this_row
virtual string GetErrorMessage(const string &filename=NcbiEmptyString)
Return a string with one (or two, depending on error) source line(s) on which the error occured,...
virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)
Read an AGP file from the given input stream.
bool ProcessThisRow()
Invoked from ReadStream(), after the row has been parsed, and seldom needs to be invoked by user.
void SetErrorHandler(CAgpErr *arg)
CRef< CCustomErrorHandler > custom_err
virtual void OnObjectChange()
virtual void OnGapOrComponent()
set< string > m_obj_names
CAgpRenumber(ostream &out)
string ToString(bool reorder_linkage_evidences=false)
static bool IsGap(char c)
static bool MustRenumber(int code)
virtual void Msg(int code, int appliesTo=fAtThisLine)
virtual void Msg(int code, const string &details, int appliesTo=fAtThisLine)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
iterator_bool insert(const value_type &val)
std::ofstream out("events_result.xml")
main entry point for tests
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
CNcbistrstream_Base< IO_PREFIX::ostrstream, IOS_BASE::out > CNcbiOstrstream
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)