94 string GetName()
const {
return "file_load_aligner"; }
105 size_t Pos = In.tellg();
109 NewResults->Insert(Temp);
110 cerr <<
"FileLoad Aligner: 1 Seq-align" << endl;
118 NewResults->Insert(*Temp);
119 cerr <<
"FileLoad Aligner: " << Temp->
Get().size() <<
" Seq-align-set" << endl;
128 TempSet->
Set().insert(TempSet->
Set().end(), Temp->
SetData().SetAlign().begin(), Temp->
SetData().SetAlign().end());
129 NewResults->Insert(*TempSet);
131 cerr <<
"FileLoad Aligner: " << TempSet->
Get().size() <<
" Seq-annot" << endl;
163 const string& Category);
166 const string& Category,
196 "Generic app for creating an NgAlign "
197 "run, and running it.");
199 arg_desc->AddFlag(
"info",
"info level logging");
201 arg_desc->AddDefaultKey(
"o",
"OutputFile",
205 arg_desc->AddFlag(
"b",
"Output binary ASN.1");
207 arg_desc->AddFlag(
"dup",
"allow dupes");
209 arg_desc->AddKey(
"run",
"InputFile",
"ngalign run ini file",
212 arg_desc->AddOptionalKey(
"asn_cache",
"string",
"comma seperated paths",
214 arg_desc->AddFlag(
"G",
"No genbank");
216 arg_desc->AddOptionalKey(
"query",
"string",
"single seq-id",
218 arg_desc->AddOptionalKey(
"subject",
"string",
"single seq-id",
220 arg_desc->AddOptionalKey(
"qidlist",
"file",
"seq-id list file",
222 arg_desc->AddOptionalKey(
"sidlist",
"file",
"seq-id list file",
224 arg_desc->AddOptionalKey(
"idlist",
"file",
"fasta seq-id list file",
226 arg_desc->AddOptionalKey(
"qloclist",
"file",
"seq-loc list file",
228 arg_desc->AddOptionalKey(
"sloclist",
"file",
"seq-loc list file",
230 arg_desc->AddOptionalKey(
"blastdb",
"blastdb",
"Blastdb Name",
232 arg_desc->AddOptionalKey(
"softfilter",
"string",
"blastdb soft filter, int or string",
234 arg_desc->AddOptionalKey(
"fasta",
"file",
"fasta file",
237 arg_desc->AddOptionalKey(
"seqentry",
"file",
"seq-entry file",
239 arg_desc->AddOptionalKey(
"agp",
"file",
"agp file",
242 arg_desc->AddOptionalKey(
"gc",
"file",
"gencoll asn.1 Assembly, for subject side. Allows for seperate ranking by assembly-unit.",
245 arg_desc->AddDefaultKey(
"batch",
"int",
"batch size for loaded ids",
248 arg_desc->AddOptionalKey(
"nohit",
"outfile",
"List of nohit IDs",
251 arg_desc->AddOptionalKey(
"filters",
"string",
252 "semi-colon seperated list of filters, overrides the ini file",
254 arg_desc->AddOptionalKey(
"scorers",
"string",
255 "semi-colon seperated list of scorers, overrides the ini file",
277 string Orig = args[
"blastdb"].AsString();
280 ITERATE(vector<string>, DBIter, DBs) {
281 cerr << *DBIter << endl;
291 vector<string> Tokens;
292 NStr::Split(args[
"asn_cache"].AsString(),
",", Tokens);
294 ITERATE(vector<string>, CacheIter, Tokens) {
295 CFile CacheFile( *CacheIter +
"/asn_cache.idx" );
314 bool AllowDupes =
false;
315 if(RunRegistry.
Get(
"ngalign",
"allow_dupes") ==
"true")
317 if(args[
"dup"].
HasValue() && args[
"dup"].AsBoolean())
342 cerr <<
"Failed to create Run." << endl;
348 CurrAligns = NgAligner.
Align();
355 Alignments->
Set().insert(Alignments->
Set().end(),
356 CurrAligns->
Get().begin(),
357 CurrAligns->
Get().end());
363 if(!Alignments.
IsNull()) {
394 if(args[
"b"].
HasValue() && args[
"b"].AsBoolean())
403 cerr <<
"No alignments found." << endl;
434 const string& Category)
437 const string Type = RunRegistry->
Get(Category,
"type");
439 const string Source = RunRegistry->
Get(Category,
"source");
441 const string Mask = RunRegistry->
Get(Category,
"mask");
444 string NMer =
"/netmnt/vast01/gp/ThirdParty/WindowMasker/data/" + Mask;
446 0, 1, 1, 0, 0, 0, 0, 0, 0,
false, 0, 0, 0, 0,
"mean", 0,
false, 0,
false);
452 if(
Type ==
"seqidlist") {
455 if(Args[
"query"].
HasValue() && Source.empty() && Category ==
"query") {
456 const string& Id = Args[
"query"].AsString();
458 IdList->SetIdList().push_back(QueryId);
460 if(Args[
"subject"].
HasValue() && Source.empty() && Category ==
"subject") {
461 const string& Id = Args[
"subject"].AsString();
463 IdList->SetIdList().push_back(SubjectId);
465 if(Args[
"qidlist"].
HasValue() && Source.empty() && Category ==
"query") {
471 if(!
Line.empty() &&
Line[0] !=
'#') {
474 IdList->SetIdList().push_back(QueryId);
478 if(Args[
"sidlist"].
HasValue() && Source.empty() && Category ==
"subject") {
484 if(!
Line.empty() &&
Line[0] !=
'#') {
487 IdList->SetIdList().push_back(SubjectId);
498 if(!
Line.empty() &&
Line[0] !=
'#') {
508 if(IdList->SetIdList().empty() && !
m_LoadedIds.empty()) {
509 int BatchSize = Args[
"batch"].AsInteger();
520 IdList->SetSeqMasker(Masker);
524 else if(
Type ==
"seqloclist") {
527 if(Args[
"query"].
HasValue() && Source.empty() && Category ==
"query" ) {
528 string QueryString = Args[
"query"].AsString();
532 string Line = QueryString;
533 if(!
Line.empty() &&
Line[0] !=
'#') {
534 vector<string> Tokens;
538 Loc->SetInt().SetId().Set(Tokens[0]);
539 if(
Loc->GetInt().GetId().IsGi() &&
Loc->GetInt().GetId().GetGi() <
GI_CONST(50)) {
545 LocList->SetLocList().push_back(
Loc);
549 else if(Args[
"qloclist"].
HasValue() && Source.empty() && Category ==
"query" ) {
555 if(!
Line.empty() &&
Line[0] !=
'#') {
556 vector<string> Tokens;
560 Loc->SetInt().SetId().Set(Tokens[0]);
561 if(
Loc->GetInt().GetId().IsGi() &&
Loc->GetInt().GetId().GetGi() <
GI_CONST(50)) {
567 LocList->SetLocList().push_back(
Loc);
572 else if(Args[
"subject"].
HasValue() && Source.empty() && Category ==
"subject" ) {
573 string SubjectString = Args[
"subject"].AsString();
577 string Line = SubjectString;
578 if(!
Line.empty() &&
Line[0] !=
'#') {
579 vector<string> Tokens;
582 Loc->SetInt().SetId().Set(Tokens[0]);
583 if(
Loc->GetInt().GetId().IsGi() &&
Loc->GetInt().GetId().GetGi() <
GI_CONST(50)) {
589 LocList->SetLocList().push_back(
Loc);
594 else if(Args[
"sloclist"].
HasValue() && Source.empty() && Category ==
"subject" ) {
600 if(!
Line.empty() &&
Line[0] !=
'#') {
601 vector<string> Tokens;
604 Loc->SetInt().SetId().Set(Tokens[0]);
605 if(
Loc->GetInt().GetId().IsGi() &&
Loc->GetInt().GetId().GetGi() <
GI_CONST(50)) {
611 LocList->SetLocList().push_back(
Loc);
617 LocList->SetSeqMasker(Masker);
621 else if(
Type ==
"blastdb") {
622 string Orig = Args[
"blastdb"].AsString();
625 ITERATE(vector<string>, DBIter, DBs) {
628 BlastDb->SetSoftFiltering(Args[
"softfilter"].AsString());
634 else if(
Type ==
"fasta") {
636 FileName = RunRegistry->
Get(Category,
"fasta");
638 FileName = Args[
"fasta"].AsString();
641 Batch = Args[
"batch"].AsInteger();
649 for(
int i = 0;
i < Batch;
i++) {
658 else if(
Type ==
"splitseqidlist") {
662 const string& Id = Args[
"query"].AsString();
664 IdList->AddSeqId(QueryId);
687 if(!
Line.empty() &&
Line[0] !=
'#') {
697 cerr << __LINE__ << endl;
699 int BatchSize = Args[
"batch"].AsInteger();
703 cerr << __LINE__ << (*LoadedIdsIter)->AsFastaString() << endl;
711 if(IdList->
Empty()) {
712 ERR_POST(
Error <<
" Split Seq Id List is empty, maybe all gap? ");
716 IdList->SetSeqMasker(Masker);
720 else if(
Type ==
"splitseqloclist") {
723 if(Args[
"qloclist"].
HasValue() && Source.empty() && Category ==
"query" ) {
729 if(!
Line.empty() &&
Line[0] !=
'#') {
730 vector<string> Tokens;
733 if(Tokens.size() >= 3) {
734 Loc->SetInt().SetId().Set(Tokens[0]);
738 Loc->SetWhole().Set(Tokens[0]);
740 LocList->AddSeqLoc(
Loc);
745 if(Args[
"sloclist"].
HasValue() && Source.empty() && Category ==
"subject" ) {
751 if(!
Line.empty() &&
Line[0] !=
'#') {
752 vector<string> Tokens;
755 if(Tokens.size() >= 3) {
756 Loc->SetInt().SetId().Set(Tokens[0]);
760 Loc->SetWhole().Set(Tokens[0]);
762 LocList->AddSeqLoc(
Loc);
766 if(LocList->
Empty()) {
767 ERR_POST(
Error <<
" Split Seq Loc List is empty, maybe all gap? ");
771 LocList->SetSeqMasker(Masker);
781 const string& Category,
790 while(!FastaReader.
AtEOF()) {
798 LoadedIds.push_back(
Entry->GetSeq().GetId().front() );
800 cerr << __LINE__ <<
"\t" << LoadedIds.size() << endl;
810 size_t Pos = In.tellg();
822 cerr <<
"Read Failure" << endl;
835 vector< CRef< CSeq_entry > > SeqEntries;
841 SeqEntries =
Reader.GetResult();
844 cerr <<
"AgpRead Exception: " << e.
ReportAll() << endl;
850 LoadedIds.push_back( (*SeqEntryIter)->GetSeq().GetId().front() );
869 }
else if(Top->
IsSeq()) {
885 ScorerNames =
GetArgs()[
"scorers"].AsString();
887 ScorerNames = RunRegistry->
Get(
"scorers",
"names");
889 vector<string> Names;
892 ITERATE(vector<string>, NameIter, Names) {
894 if(*NameIter ==
"blast")
897 else if(*NameIter ==
"pctident")
899 else if(*NameIter ==
"pctcov")
901 else if(*NameIter ==
"comcomp")
903 else if(*NameIter ==
"expansion")
905 else if(*NameIter ==
"weighted") {
907 K = RunRegistry->
GetDouble(
"scorers",
"sw_cvg", 0.04);
910 else if(*NameIter ==
"hang")
912 else if(*NameIter ==
"overlap")
914 else if(*NameIter ==
"clip") {
916 K = RunRegistry->
GetDouble(
"scorers",
"sw_cvg", 0.04);
931 string OrigFilters = args[
"filters"].AsString();
932 vector<string> SplitFilters;
934 ITERATE(vector<string>, FilterIter, SplitFilters) {
935 const string FilterStr = *FilterIter;
941 cerr <<
"x_AddFilters" <<
" : " << Rank <<
" : " << FilterStr << endl;
946 string FilterNames = RunRegistry->
Get(
"filters",
"names");
947 vector<string> Names;
949 ITERATE(vector<string>, NameIter, Names) {
950 string FilterStr = RunRegistry->
Get(
"filters", *NameIter);
956 cerr <<
"x_AddFilters" <<
" : " << *NameIter <<
" : " << FilterStr << endl;
966 string FilterNames = RunRegistry->
Get(
"aligners",
"names");
967 vector<string> Names;
970 ITERATE(vector<string>, NameIter, Names) {
972 string Type = RunRegistry->
Get(*NameIter,
"type");
977 else if(
Type ==
"remote_blast")
979 else if(
Type ==
"merge")
983 else if(
Type ==
"inversion")
985 else if(
Type ==
"split")
987 else if(
Type ==
"file")
999 string Params = RunRegistry->
Get(Name,
"params");
1000 int Threshold = RunRegistry->
GetInt(Name,
"threshold", 0);
1001 string Filter = RunRegistry->
Get(Name,
"filter");
1002 bool UseNegatives = RunRegistry->
GetBool(Name,
"useneg",
true);
1008 Filter = Args[
"softfilter"].AsString();
1016 Blaster->SetSoftFiltering(
Filter);
1018 Blaster->SetUseNegativeGiList(UseNegatives);
1027 string Params = RunRegistry->
Get(Name,
"params");
1028 int Threshold = RunRegistry->
GetInt(Name,
"threshold", 0);
1047 int Threshold = RunRegistry->
GetInt(Name,
"threshold", 0);
1048 double Clip = RunRegistry->
GetDouble(Name,
"clip", 3.0);
1049 string ModeStr = RunRegistry->
Get(Name,
"mode");
1051 if(ModeStr ==
"cleanup")
1053 else if(ModeStr ==
"tree")
1070 int Threshold = RunRegistry->
GetInt(Name,
"threshold", 0);
1086 string FileNameStr = RunRegistry->
Get(Name,
"filename");
1087 cerr <<
"x_CreateFileLoadAligner : " << FileNameStr << endl;
1124 if(Found != GivenIds.
end())
1125 GivenIds.
erase(Found);
1129 Out << IdIter->AsString() << endl;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eAgpVersion_1_1
AGP spec 1.1.
Data loader implementation that uses the blast databases.
vector< CRef< CSeq_id > > SeqIdList
This class is used to turn an AGP file into a vector of Seq-entry's.
@ fSetSeqGap
Found gaps will not be given Seq-data such as Type and Linkage.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &db_path, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
@ eNucleotide
nucleotide database
@ eSkipUnsupportedAlignments
Base class for reading FASTA sequences.
TAlignResultsRef GenerateAlignments(objects::CScope &Scope, ISequenceSet *QuerySet, ISequenceSet *SubjectSet, TAlignResultsRef AccumResults)
CFileLoadAligner(const string &name)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
@ eAlignCleanup
Use the older (CAlignCleanup) merge algorithm.
@ eTreeAlignMerger
Use the new (CTreeAlignMerger) merge algorithm.
void Init()
Initialize the application.
void x_AddAligners(CNgAligner &NgAligner, IRegistry *RunRegistry)
unique_ptr< CUnorderedSplitter > m_Splitter
list< CRef< CSeq_id > >::const_iterator LoadedIdsIter
void x_AddScorers(CNgAligner &NgAligner, IRegistry *RunRegistry)
CRef< CInversionMergeAligner > x_CreateInversionMergeAligner(IRegistry *RunRegistry, const string &Name)
CRef< ISequenceSet > x_CreateSequenceSet(IRegistry *RunRegistry, const string &Category)
list< CRef< CSeq_id > > m_LoadedIds
void x_RecurseSeqEntry(CRef< CSeq_entry > Top, list< CRef< CSeq_id > > &SeqIdList)
CRef< CSplitSeqAlignMerger > x_CreateSplitSeqMergeAligner(IRegistry *RunRegistry, const string &Name)
CRef< CMergeAligner > x_CreateMergeAligner(IRegistry *RunRegistry, const string &Name)
void x_LoadExternalSequences(IRegistry *RunRegistry, const string &Category, list< CRef< CSeq_id > > &LoadedIds)
CRef< CGC_Assembly > x_LoadAssembly(CNcbiIstream &In)
void x_AddFilters(CNgAligner &NgAligner, IRegistry *RunRegistry)
void x_PrintNoHitList(CNcbiOstream &Out, const CSeq_align_set &Alignments)
int Run()
Run the application.
CRef< CRemoteBlastAligner > x_CreateRemoteBlastAligner(IRegistry *RunRegistry, const string &Name)
bool x_CreateNgAlignRun(CNgAligner &NgAligner, IRegistry *RunRegistry)
CRef< CFileLoadAligner > x_CreateFileLoadAligner(IRegistry *RunRegistry, const string &Name)
CRef< CBlastAligner > x_CreateBlastAligner(IRegistry *RunRegistry, const string &Name)
void SetQuery(ISequenceSet *Set)
void SetSubject(ISequenceSet *Set)
void AddFilter(IAlignmentFilter *Filter)
void AddAligner(IAlignmentFactory *Aligner)
void AddScorer(IAlignmentScorer *Scorer)
Main interface to window based masker functionality.
Simple implementation of ILineReader for i(o)streams.
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
Operators to edit gaps in sequences.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
@ eDiag_Info
Informational message.
void Error(CExceptionArgs_Base &args)
void Warning(CExceptionArgs_Base &args)
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
void Info(CExceptionArgs_Base &args)
virtual bool Exists(void) const
Check existence of file.
#define MSerial_AsnBinary
#define MSerial_AsnText
I/O stream manipulators –.
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
virtual bool GetBool(const string §ion, const string &name, bool default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get boolean value of specified parameter name.
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
virtual int GetInt(const string §ion, const string &name, int default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get integer value of specified parameter name.
virtual double GetDouble(const string §ion, const string &name, double default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get double value of specified parameter name.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TSeq & GetSeq(void) const
Get the variant data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
void SetData(TData &value)
Assign a value to Data data member.
const TId & GetId(void) const
Get the Id member data.
Lightweight interface for getting lines of data with minimal memory copying.
Magic spell ;-) needed for some weird compilers... very empiric.
GenericReader< UTF8< char >, UTF8< char >, CrtAllocator > Reader
Reader with UTF8 encoding and default allocator.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
#define GetProgramName
Avoid name clash with the NCBI C Toolkit.
void Filter(TTimeline &timeline, TServers &servers)
int main(int argc, char **argv)
void Out(T t, int w, CNcbiOstream &to=cout)
CRef< objects::CObjectManager > om