71 if (synonyms && !synonyms->empty()) {
72 for (
auto& it : *synonyms) {
97 else if (!bsh1 || !bsh2) {
100 else if (bsh1.GetBioseqLength() != bsh2.GetBioseqLength()) {
103 else if (bsh1.IsNucleotide() && bsh2.IsProtein()) {
106 else if (bsh1.IsProtein() && bsh2.IsNucleotide()) {
114 old_seqvec.
GetSeqData(0, bsh1.GetBioseqLength(), old_seq);
116 upd_seqvec.
GetSeqData(0, bsh2.GetBioseqLength(), upd_seq);
127 for (
auto& it : upd_ids) {
141 bool has_conflict =
false;
142 for (
auto& upd_id : bseq.
GetId()) {
143 for (
auto& old_id : seq_ids) {
159 vector<CRef<CSeq_id>> update_ids;
160 for (
auto& it : bseq.
GetId()) {
162 newid->
Assign(it.GetObject());
163 update_ids.push_back(newid);
166 string lclID_label, gbID_label, accID_label;
167 if (bseq.
GetId().size() == 1) {
172 if ((*upd_id)->IsLocal()) {
175 else if ((*upd_id)->IsGenbank()) {
178 else if ((*upd_id)->IsOther()) {
181 accID_label.push_back(
'.');
188 if (!gbID_label.empty()) {
191 else if (!lclID_label.empty()) {
194 else if (!accID_label.empty()) {
203 bseq.
SetId().push_back(newUpdate_Id);
233 unmatched.push_back(idh);
236 bool collide =
false;
246 if (matches.
find(old_idh) != matches.
end()) {
251 matches.emplace(old_idh, upd_idh);
257 unmatched.push_back(idh);
268 if (matches.
find(old_id_handle) != matches.
end())
273 for (
auto& id1 : oldBseq->
GetId()) {
274 for (
auto& id2 : upd_bseq.
GetId()) {
300 if (old_dot !=
NPOS) {
301 old_label = old_label.substr(0, old_dot);
303 if (upd_dot !=
NPOS) {
304 upd_label = upd_label.substr(0, upd_dot);
331 if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr()
333 && (*it)->IsSetData()
334 && (*it)->GetData().IsStr()) {
336 string id_str = (*it)->GetData().GetStr();
354 return vector<CConstRef<CSeq_align> >();
366 return vector<CConstRef<CSeq_align> >();
398 result_iter, results->Get()) {
400 assm_iter, result_iter->second->Get()) {
402 query_iter, assm_iter->second) {
419 void CGPipeAlignmentScorer::s_AddStandardAlignmentScores(
CScope& scope,
CSeq_align& align,
int flags)
454 vector<CConstRef<CSeq_align> > align_vector;
457 LOG_POST(
Error <<
"Both sequences should be in the same scope");
471 query->SetLocList().push_back(query_seqloc);
475 subject->SetLocList().push_back(subject_seqloc);
482 return reinterpret_cast<ICanceled*
>(
prog->user_data)->IsCanceled();
486 bool useHiWordAligner = (seqLength > 12000);
487 if (useHiWordAligner) {
489 opts->SetTraditionalBlastnDefaults();
490 blast::CBlastOptions& options = opts->SetOptions();
492 options.SetWordSize(1200);
493 options.SetEvalueThreshold(1e-6);
494 options.SetBestHitOverhang(0.1);
495 options.SetBestHitScoreEdge(0.1);
499 blastAligner->SetInterruptCallback(cb, canceled);
506 opts->SetTraditionalBlastnDefaults();
507 blast::CBlastOptions& options = opts->SetOptions();
509 options.SetWordSize(12);
510 options.SetEvalueThreshold(1e-6);
511 options.SetBestHitOverhang(0.1);
512 options.SetBestHitScoreEdge(0.1);
517 blastAligner->SetInterruptCallback(cb, canceled);
529 ng_aligner.
AddFilter(
new CQueryFilter(0,
"pct_identity_gapopen_only >= 99.5 AND pct_coverage >= 99"));
533 if (accept_atleast_one) {
539 if (align && align->
IsSet()) {
550 vector<CConstRef<CSeq_align> > align_vector;
571 align_vector.push_back(align);
578 vector<CConstRef<CSeq_align> > align_vector;
587 blast::CBlastProteinOptionsHandle prot_opts_handle;
588 prot_opts_handle.SetEvalueThreshold(1e-6);
589 prot_opts_handle.SetWordThreshold(100.0);
590 prot_opts_handle.Validate();
592 blast::CBl2Seq blaster(
query,
subject, prot_opts_handle);
595 if (!seqaligns.empty()) {
597 if ((*it)->IsSet()) {
605 catch (
const blast::CBlastException& e) {
617 return length_first >= length_sec;
Declares the CBlastProteinOptionsHandle class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
CSeq_entry * GetParentEntry(void) const
@ eSkipUnsupportedAlignments
void ScoreAlignments(TAlignResultsRef results, CScope &scope)
@ eTreeAlignMerger
Use the new (CTreeAlignMerger) merge algorithm.
void SetQuery(ISequenceSet *Set)
void SetSubject(ISequenceSet *Set)
void AddFilter(IAlignmentFilter *Filter)
void AddAligner(IAlignmentFactory *Aligner)
void AddScorer(IAlignmentScorer *Scorer)
int GetGapCount(const CSeq_align &align)
Compute the number of gaps in the alignment.
void AddScore(CScope &scope, CSeq_align &align, EScoreType score)
deprecated: use CSeq_align::EScoreType directly
Sequence update exception class.
@ eScore_PercentIdentity_GapOpeningOnly
@ eScore_PercentIdentity_Gapped
@ eScore_PercentIdentity_Ungapped
void SetNamedScore(const string &id, int score)
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
list< CRef< CSeq_annot > > TAnnot
CSeq_entry * GetParentEntry(void) const
Template class for iteration on objects of class C.
EObjectType GetObjectType() const
Interface for testing cancellation request in a long lasting operation.
const_iterator end() const
const_iterator find(const key_type &key) const
static void FindMatches(objects::CSeq_inst::EMol type, objects::CSeq_entry_Handle &oldSeq, objects::CSeq_entry &updEntry, TSeqIdHMap &matches, TSeqIdHVector &unmatched)
static bool CompareAlignments(const objects::CSeq_align &align_first, const objects::CSeq_align &align_sec)
Returns true if the first alignment is 'better' than the second one Compares the Blast scores and in ...
static vector< CConstRef< objects::CSeq_align > > RunBlast2Seq(const objects::CBioseq_Handle &subject, const objects::CBioseq_Handle &query, bool accept_atleast_one, ICanceled *canceled=nullptr)
static bool HaveIdenticalResidues(const objects::CBioseq_Handle &bsh1, const objects::CBioseq_Handle &bsh2)
vector< objects::CSeq_id_Handle > TSeqIdHVector
static void FixCollidingIDs_Bioseq(objects::CBioseq &bseq, const objects::CBioseq::TId &seq_ids)
static objects::CSeq_id_Handle GetGoodSeqIdHandle(const objects::CBioseq_Handle &bsh)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
E_SIC
Compare return values.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Swap(CSeq_id_Handle &idh)
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_NO
different SeqId types-can't compare
@ e_YES
SeqIds compared, but are different.
@ eContent
Untagged human-readable accession or the like.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CConstRef< CSeq_id > GetLocalIdOrNull(void) const
bool IsNucleotide(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_id_Handle GetAccessSeq_id_Handle(void) const
Get any CSeq_id_Handle handle that can be used to access this bioseq Use GetSeq_id_Handle() if it's n...
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
bool IsProtein(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
CConstRef< CSynonymsSet > GetSynonyms(void) const
Get the bioseq's synonyms.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
const TData & GetData(void) const
Get the Data member data.
vector< CRef< CUser_field > > TData
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsLocal(void) const
Check if variant Local is selected.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSet(void) const
Check if variant Set is selected.
@ eClass_nuc_prot
nuc acid and coded proteins
TId & SetId(void)
Assign a value to Id data member.
void ResetId(void)
Reset Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
list< CRef< CSeq_id > > TId
TMol GetMol(void) const
Get the Mol member data.
EMol
molecule class in living organism
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
@ e_User
user defined object
Uint1 Boolean
bool replacment for C
@ fScore_GapCount
add a 'gap_count' score
@ fScore_Identities
add BLAST-style 'num_ident' score
@ fScore_Default
default flags: everything
@ fScore_Mismatches
add a 'mismatch' core with a count of mismatches
@ fScore_PercentCoverage
add a score for percent coverage of query (sequence 0)
@ fScore_PercentIdentity
add scores for ungapped and gapped percent identity
const SNCBIPackedScoreMatrix NCBISM_Blosum62
static const char * kUpdateSuffix
static void s_FixCollidingIDs_Annot(CBioseq &bseq, CSeq_entry::TAnnot &annot, const vector< CRef< CSeq_id >> &upd_ids)
static CBioseq_Handle s_GetMatchingSequence(CSeq_inst::EMol type, CSeq_entry_Handle &oldSeq, const CBioseq &upd_bseq, bool &collide, const sequpd::TSeqIdHMap &matches)
static CConstRef< CSeq_id > s_GetOriginalId(const CBioseq_Handle &bsh)
static bool s_MatchSeqIds(CScope &scope, const CSeq_id &old_id, const CSeq_id &upd_id, bool &collide)
static vector< CConstRef< CSeq_align > > s_RunBlast2NASeq(const CBioseq_Handle &sh, const CBioseq_Handle &qh, bool accept_atleast_one, ICanceled *canceled)
static vector< CConstRef< CSeq_align > > s_RunBlast2NWSeq(const CBioseq_Handle &sh, const CBioseq_Handle &qh)
static vector< CConstRef< CSeq_align > > s_RunBlast2AASeq(const CBioseq_Handle &sh, const CBioseq_Handle &qh)
Progress monitoring structure.