NCBI C++ ToolKit
feature_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature_item.cpp 101909 2024-03-01 12:11:21Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat
28 * Maintainer: Frank Ludwig
29 *
30 * File Description:
31 * new (early 2003) flat-file generator -- representation of features
32 * (mainly of interest to implementors)
33 *
34 *
35 * WHEN EDITING THE LIST OF QUALIFIERS:
36 *
37 * - there is currently a lot of parallel logic for the FTable case
38 * (CFeatureItem::x_AddFTableQuals()) and the standard case
39 * (CFeatureItem::x_Add...Quals()). Make sure to edit both cases as
40 * appropriate.
41 * ===========================================================================
42 */
43 #include <ncbi_pch.hpp>
44 #include <corelib/ncbistd.hpp>
45 #include <serial/iterator.hpp>
46 #include <serial/enumvalues.hpp>
47 
48 #include <algorithm>
49 #include <sstream>
50 
51 #include <objects/seq/Bioseq.hpp>
53 #include <objects/seq/MolInfo.hpp>
89 
90 #include <objmgr/scope.hpp>
92 #include <objmgr/seqdesc_ci.hpp>
93 #include <objmgr/seq_vector.hpp>
94 #include <objmgr/util/sequence.hpp>
95 #include <objmgr/util/feature.hpp>
96 #include <objmgr/util/weight.hpp>
98 
99 #include <util/static_set.hpp>
100 #include <util/static_map.hpp>
101 #include <util/sequtil/sequtil.hpp>
103 
104 #include <algorithm>
111 #include <objmgr/util/objutil.hpp>
112 #include "inst_info_map.hpp"
113 
114 // On Mac OS X 10.3, FixMath.h defines ff as a one-argument macro(!)
115 #ifdef ff
116 # undef ff
117 #endif
118 
121 USING_SCOPE(sequence);
122 
124 {
125 public:
126  bool operator() ( const CConstRef<CFlatGoQVal> &obj1, const CConstRef<CFlatGoQVal> &obj2 )
127  {
128  const CFlatGoQVal *qval1 = obj1.GetNonNullPointer();
129  const CFlatGoQVal *qval2 = obj2.GetNonNullPointer();
130 
131  // sort by text string
132  const string &str1 = qval1->GetTextString();
133  const string &str2 = qval2->GetTextString();
134 
135  int textComparison = 0;
136 
137  // This whole paragraph should eventually be replaced with a mere NStr::CompareNocase stored into textComparison
138  // We can't just use NStr::CompareNocase, because that compares using tolower, whereas
139  // we must compare with toupper to maintain compatibility with C.
140  SIZE_TYPE pos = 0;
141  const SIZE_TYPE min_length = min( str1.length(), str2.length() );
142  for( ; pos < min_length; ++pos ) {
143  textComparison = toupper( str1[pos] ) - toupper( str2[pos] );
144  if( textComparison != 0 ) {
145  break;
146  }
147  }
148  if( 0 == textComparison ) {
149  // if we reached the end, compare via length (shorter first)
150  textComparison = str1.length() - str2.length();
151  }
152 
153  // compare by text, if possible
154  if( textComparison < 0 ) {
155  return true;
156  } else if( textComparison > 0 ) {
157  return false;
158  }
159 
160  // if text is tied, then sort by pubmed id, if any
161  int pmid1 = qval1->GetPubmedId();
162  int pmid2 = qval2->GetPubmedId();
163 
164  if( 0 == pmid1 ) {
165  return false;
166  } else if( 0 == pmid2 ) {
167  return true;
168  } else {
169  return pmid1 < pmid2;
170  }
171  }
172 };
173 
174 // -- static functions
175 
176 static bool s_ValidId(const CSeq_id& id)
177 {
178  return id.IsGenbank() || id.IsEmbl() || id.IsDdbj() ||
179  id.IsOther() || id.IsPatent() ||
180  id.IsTpg() || id.IsTpe() || id.IsTpd() ||
181  id.IsGpipe();
182 }
183 
184 static
186  const string &s1, const string &s2,
187  NStr::ECase use_case )
188 {
189  if( s1.empty() || s2.empty() ) {
190  return s1.empty() && s2.empty();
191  }
192 
193  // set length to disregard final period, if any
194  size_t s1_len = s1.length();
195  if( s1[s1_len-1] == '.' ) {
196  --s1_len;
197  }
198  size_t s2_len = s2.length();
199  if( s2[s2_len-1] == '.' ) {
200  --s2_len;
201  }
202 
203  if( s1_len != s2_len ) {
204  return false;
205  }
206 
207  // NStr::Equal does not have exactly the function I want,
208  // so I have to make my own.
209  for( size_t ii = 0; ii < s1_len ; ++ii ) {
210  const char ch1 = ( use_case == NStr::eNocase ? toupper(s1[ii]) : s1[ii] );
211  const char ch2 = ( use_case == NStr::eNocase ? toupper(s2[ii]) : s2[ii] );
212  if( ch1 != ch2 ) {
213  return false;
214  }
215  }
216  return true;
217 }
218 
219 static bool s_CheckQuals_cdregion(const CMappedFeat& feat,
220  const CSeq_loc& loc,
222 {
223  if ( !ctx.Config().CheckCDSProductId() ) {
224  return true;
225  }
226 
227  CScope& scope = ctx.GetScope();
228 
229  // non-pseudo CDS must have /product
230  bool pseudo = feat.IsSetPseudo() && feat.GetPseudo() ;
231  if ( !pseudo && !ctx.IsEMBL() && !ctx.IsDDBJ() ) {
232  const CGene_ref* grp = feat.GetGeneXref();
233  if (! grp) {
234  CConstRef<CSeq_feat> gene = GetOverlappingGene(loc, scope);
235  if (gene) {
236  pseudo = gene->IsSetPseudo() && gene->GetPseudo();
237  if ( !pseudo ) {
238  grp = &(gene->GetData().GetGene());
239  }
240  }
241  }
242  if (! pseudo && grp) {
243  pseudo = grp->GetPseudo();
244  }
245  }
246 
247  bool just_stop = false;
248  const CSeq_loc& Loc = feat.GetLocation();
249  if ( Loc.IsPartialStart(eExtreme_Biological) && !Loc.IsPartialStop(eExtreme_Biological) ) {
250  if ( GetLength(Loc, &scope) <= 5 ) {
251  just_stop = true;
252  }
253  }
254 
255  if ( pseudo || just_stop ) {
256  return true;
257  }
258 
259  // make sure the product has a valid accession
260  if (feat.IsSetProduct()) {
262  try {
263  id.Reset(&(GetId(feat.GetProduct(), &scope)));
264  } catch ( CException& ) {
265  id.Reset();
266  }
267  if (id) {
268  if ((id->IsGi() && id->GetGi() > ZERO_GI) || id->IsLocal()) {
269  CBioseq_Handle prod = scope.GetBioseqHandleFromTSE(*id, ctx.GetHandle());
270  if (prod) {
271  ITERATE (CBioseq_Handle::TId, it, prod.GetId()) {
272  if (s_ValidId(*it->GetSeqId())) {
273  CConstRef<CTextseq_id> tsip(it->GetSeqId()->GetTextseq_Id());
274  if (tsip && tsip->IsSetAccession() &&
275  IsValidAccession(tsip->GetAccession())) {
276  return true;
277  }
278  }
279  }
280  } else if (id->IsGi() && id->GetGi() > ZERO_GI) {
281  // RELEASE_MODE requires that /protein_id is an accession
282  if (ctx.Config().IsModeRelease()) {
283  try {
284  if (IsValidAccession(GetAccessionForGi(id->GetGi(), scope))) {
285  return true;
286  }
287  } catch (CException&) {
288  }
289  }
290  }
291  } else if (s_ValidId(*id)) {
293  if (tsip && tsip->IsSetAccession() &&
294  IsValidAccession(tsip->GetAccession())) {
295  return true;
296  }
297  }
298  }
299  } else { // no product
300  if (feat.IsSetExcept() && feat.GetExcept() &&
301  feat.IsSetExcept_text() ) {
302  if (NStr::Find(feat.GetExcept_text(),
303  "rearrangement required for product") != NPOS) {
304  return true;
305  }
306  }
307  }
308 
309  return false;
310 }
311 
312 
313 
314 static bool s_HasPub(const CMappedFeat& feat, CBioseqContext& ctx)
315 {
316  ITERATE(CBioseqContext::TReferences, it, ctx.GetReferences()) {
317  if ((*it)->Matches(feat.GetCit())) {
318  return true;
319  }
320  }
321 
322  return false;
323 }
324 
325 
327 {
328  // check for /compare
329  if (!NStr::IsBlank(feat.GetNamedQual("compare"))) {
330  return true;
331  }
332 
333  // check for /citation
334  if (feat.IsSetCit()) {
335  return s_HasPub(feat, ctx);
336  }
337 
338  return false;
339 }
340 
341 
342 // conflict requires /citation or /compare
344 {
345  // RefSeq allows conflict with accession in comment instead of sfp->cit
346  if (ctx.IsRefSeq() &&
347  feat.IsSetComment() && !NStr::IsBlank(feat.GetComment())) {
348  return true;
349  }
350 
351  return s_HasCompareOrCitation(feat, ctx);
352 }
353 
354 // old_sequence requires /citation or /compare
356 {
357  return s_HasCompareOrCitation(feat, ctx);
358 }
359 
360 
361 static bool s_CheckQuals_gene(const CMappedFeat& feat)
362 {
363  // gene requires /gene or /locus_tag, but desc or syn can be mapped to /gene
364  const CSeqFeatData::TGene& gene = feat.GetData().GetGene();
365  if ( (gene.IsSetLocus() && !gene.GetLocus().empty()) ||
366  (gene.IsSetLocus_tag() && !gene.GetLocus_tag().empty()) ||
367  (gene.IsSetDesc() && !gene.GetDesc().empty()) ||
368  (!gene.GetSyn().empty() && !gene.GetSyn().front().empty()) ) {
369  return true;
370  }
371 
372  return false;
373 }
374 
375 
376 static bool s_CheckQuals_bind(const CMappedFeat& feat)
377 {
378  // protein_bind or misc_binding require eFQ_bound_moiety
379  return !NStr::IsBlank(feat.GetNamedQual("bound_moiety"));
380 }
381 
382 
383 static bool s_CheckQuals_mod_base(const CMappedFeat& feat)
384 {
385  // modified_base requires eFQ_mod_base
386  return !NStr::IsBlank(feat.GetNamedQual("mod_base"));
387 }
388 
389 
390 static bool s_CheckQuals_gap(const CMappedFeat& feat)
391 {
392  // gap feature must have /estimated_length qual
393  return !feat.GetNamedQual("estimated_length").empty();
394 }
395 
396 static bool s_CheckQuals_assembly_gap(const CMappedFeat& feat)
397 {
398  // assembly_gap feature must have /estimated_length qual
399  // and /gap_type
400  return ! feat.GetNamedQual("estimated_length").empty() &&
401  ! feat.GetNamedQual("gap_type").empty();
402 }
403 
404 
405 static bool s_CheckQuals_ncRNA(const CMappedFeat& feat)
406 {
407  if( !NStr::IsBlank(feat.GetNamedQual("ncRNA_class")) ) {
408  return true;
409  }
410 
411  // Look at this mess; if only we could use sequence_macros.hpp
412  if( feat.GetData().GetRna().IsSetExt() &&
413  feat.GetData().GetRna().GetExt().IsGen() &&
414  feat.GetData().GetRna().GetExt().GetGen().IsSetClass() &&
415  !NStr::IsBlank(feat.GetData().GetRna().GetExt().GetGen().GetClass()) )
416  {
417  return true;
418  }
419 
420  return false;
421 }
422 
423 
424 static bool s_CheckQuals_regulatory(const CMappedFeat& feat)
425 {
426  // regulatory feature must have /regulatory_class qual
427  return ! feat.GetNamedQual("regulatory_class").empty();
428 }
429 
430 
431 static bool s_CheckMandatoryQuals(const CMappedFeat& feat,
432  const CSeq_loc& loc,
434 {
435  switch ( feat.GetData().GetSubtype() ) {
437  {
438  return s_CheckQuals_cdregion(feat, loc, ctx);
439  }
441  {
442  return s_CheckQuals_conflict(feat, ctx);
443  }
445  {
446  return s_CheckQuals_old_seq(feat, ctx);
447  }
449  {
450  return s_CheckQuals_gene(feat);
451  }
454  {
455  return s_CheckQuals_bind(feat);
456  }
458  {
459  return s_CheckQuals_mod_base(feat);
460  }
462  {
463  return s_CheckQuals_gap(feat);
464  }
466  {
467  return s_CheckQuals_assembly_gap(feat);
468  }
470  {
471  return s_CheckQuals_ncRNA(feat);
472  }
474  {
475  return s_CheckQuals_regulatory(feat);
476  }
477  default:
478  break;
479  }
480 
481  return true;
482 }
483 
484 static bool s_SkipFeature(const CMappedFeat& feat,
485  const CSeq_loc& loc,
487 {
489  CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
490 
491  if ( subtype == CSeqFeatData::eSubtype_pub ||
492  /* subtype == CSeqFeatData::eSubtype_non_std_residue || */
493  subtype == CSeqFeatData::eSubtype_biosrc ||
494  subtype == CSeqFeatData::eSubtype_rsite ||
495  subtype == CSeqFeatData::eSubtype_seq ) {
496  return true;
497  }
498 
499  const CFlatFileConfig& cfg = ctx.Config();
500 
501  // check feature customization flags
502  if ( cfg.ValidateFeatures() &&
503  (subtype == CSeqFeatData::eSubtype_bad ||
504  subtype == CSeqFeatData::eSubtype_virion) ) {
505  return true;
506  }
507 
508  if ( cfg.ValidateFeatures() && type == CSeqFeatData::e_Imp ) {
509  switch ( subtype ) {
510  default:
511  break;
517  return true;
518  }
519  }
520 
521  if ( ctx.IsNuc() && subtype == CSeqFeatData::eSubtype_het ) {
522  return true;
523  }
524 
525  if ( cfg.HideImpFeatures() && type == CSeqFeatData::e_Imp ) {
526  return true;
527  }
528 
529  if ( cfg.HideMiscFeatures() ) {
530  if ( type == CSeqFeatData::e_Site ||
535  subtype == CSeqFeatData::eSubtype_preprotein ) {
536  return true;
537  }
538  }
539 
540  if ( cfg.HideExonFeatures() && subtype == CSeqFeatData::eSubtype_exon ) {
541  return true;
542  }
543 
544  if ( cfg.IsPolicyGenomes() && subtype == CSeqFeatData::eSubtype_exon &&
545  (ctx.GetBiomol() == CMolInfo::eBiomol_mRNA || ctx.GetBiomol() == CMolInfo::eBiomol_transcribed_RNA) ) {
546  return true;
547  }
548 
549  if ( cfg.HideIntronFeatures() && subtype == CSeqFeatData::eSubtype_intron ) {
550  return true;
551  }
552 
553  if ( cfg.HideRemoteImpFeatures() && type == CSeqFeatData::e_Imp ) {
554  if ( subtype == CSeqFeatData::eSubtype_variation ||
555  subtype == CSeqFeatData::eSubtype_exon ||
556  subtype == CSeqFeatData::eSubtype_intron ||
558  return true;
559  }
560  }
561 
563  const CSeq_feat::TDbxref& dbxref = feat.GetDbxref();
564  ITERATE (CSeq_feat::TDbxref, it, dbxref) {
565  const CDbtag& dbt = **it;
566  if ( dbt.IsSetDb() && !dbt.GetDb().empty() && dbt.GetDb() == "dbSNP") {
567  return true;
568  }
569  }
570  }
571 
572  if ( cfg.GeneRNACDSFeatures() ) {
573  if ( type != CSeqFeatData::e_Gene &&
576  return true;
577  }
578  }
579 
580  // skip genes in DDBJ format
581  if ( cfg.IsFormatDDBJ() && type == CSeqFeatData::e_Gene ) {
582  return true;
583  }
584 
585  // if RELEASE mode, make sure we have all info to create mandatory quals.
586  if ( cfg.NeedRequiredQuals() ) {
587  return !s_CheckMandatoryQuals(feat, loc, ctx);
588  }
589 
590  return false;
591 }
592 
594 public:
595  bool operator()( const char ch )
596  {
597  return( ! isdigit(ch) && ch != '.' && ch != '-' );
598  }
599 };
600 
601 // acceptable patterns are: (This might not be true anymore. Check the code. )
602 // num.num.num.num
603 // num.num.num.-
604 // num.num.-.-
605 // num.-.-.-
606 // -.-.-.-
607 // (You can use "n" instead of "-" )
608 static bool s_IsLegalECNumber(const string& ec_number)
609 {
610  if ( ec_number.empty() ) return false;
611 
612  bool is_ambig = false;
613  int numperiods = 0;
614  int numdigits = 0;
615  int numdashes = 0;
616 
617  ITERATE( string, ec_iter, ec_number ) {
618  if ( isdigit(*ec_iter) ) {
619  numdigits++;
620  if (is_ambig) return false;
621  } else if (*ec_iter == '-' ) {
622  numdashes++;
623  is_ambig = true;
624  } else if( *ec_iter == 'n') {
625  string::const_iterator ec_iter_next = ec_iter;
626  ++ec_iter_next;
627  if( ec_iter_next != ec_number.end() && numperiods == 3 && numdigits == 0 && isdigit(*ec_iter_next) ) {
628  // allow/ignore n in first position of fourth number to not mean ambiguous, if followed by digit
629  } else {
630  numdashes++;
631  is_ambig = true;
632  }
633  } else if (*ec_iter == '.') {
634  numperiods++;
635  if (numdigits > 0 && numdashes > 0) return false;
636  if (numdigits == 0 && numdashes == 0) return false;
637  if (numdashes > 1) return false;
638  numdigits = 0;
639  numdashes = 0;
640  }
641  }
642 
643  if (numperiods == 3) {
644  if (numdigits > 0 && numdashes > 0) return false;
645  if (numdigits > 0 || numdashes == 1) return true;
646  }
647 
648  return false;
649 }
650 
651 
652 static const string& s_GetBondName(CSeqFeatData::TBond bond)
653 {
654  static const string kOther = "unclassified";
655  return (bond == CSeqFeatData::eBond_other) ? kOther :
656  CSeqFeatData::ENUM_METHOD_NAME(EBond)()->FindName(bond, true);
657 }
658 
659 static void s_QualVectorToNote(
660  const CFlatFeature::TQuals& qualVector,
661  bool noRedundancy,
662  string& note,
663  string& punctuation,
664  bool& addPeriod)
665 {
666  // is there at least one note which is more than blank or a period?
667  bool hasSubstantiveNote = false;
668  // store this so we can chop off the extra stuff we added if there was no note of substance
669  const string::size_type original_length = note.length();
670 
671  string prefix;
672  ITERATE (CFlatFeature::TQuals, it, qualVector) {
673  const string& qual = (*it)->GetValue();
674 
675  prefix.erase();
676  if ( !note.empty() ) {
677  prefix = punctuation;
678  const string& next_prefix = (*it)->GetPrefix();
679  if (!NStr::EndsWith(prefix, '\n') ) {
680  prefix += next_prefix;
681  }
682  }
683 
684  if( !qual.empty() && qual != "." ) {
685  hasSubstantiveNote = true;
686  }
687 
688  // A qual may declare that it be shown even if redundant and override the
689  // given noRedundancy variable
690  const bool noRedundancyThisIteration =
691  ( 0 != ( (*it)->GetFlags() & CFormatQual::fFlags_showEvenIfRedund ) ? false : noRedundancy );
692  JoinString(note, prefix, qual, noRedundancyThisIteration );
693 
694  addPeriod = (*it)->GetAddPeriod();
695  punctuation = (*it)->GetSuffix();
696  }
697 
698  // if there was no meaningful note, we clear it
699  if( ! hasSubstantiveNote ) {
700  note.resize( original_length );
701  }
702 }
703 
704 
705 static void s_NoteFinalize(
706  bool addPeriod,
707  string& noteStr,
708  CFlatFeature& flatFeature,
709  ETildeStyle style = eTilde_newline ) {
710 
711  if (!noteStr.empty()) {
712  if (addPeriod && !NStr::EndsWith(noteStr, ".")) {
713 
714  AddPeriod(noteStr);
715  }
716  // Policy change: expand tilde on both descriptors and features
717  ExpandTildes(noteStr, style);
718  TrimSpacesAndJunkFromEnds( noteStr, true );
719 
720  CRef<CFormatQual> note(new CFormatQual("note", noteStr));
721  flatFeature.SetQuals().push_back(note);
722  }
723 }
724 
725 static int s_GetOverlap(const CMappedFeat& feat )
726 {
727  if (feat) {
728  int total_length = 0;
729  ITERATE( CSeq_loc, loc_iter, feat.GetLocation() ) {
730  total_length += loc_iter.GetRange().GetLength();
731  }
732  return total_length;
733  }
734  return 0;
735 }
736 
737 
738 ///
739 /// The best protein feature is defined as the one that has the most overlap
740 /// with the given DNA.
741 /// If there is a tie between two protein features in overlap then the one
742 /// with the lesser processing status is declared the winner.
743 ///
745 {
747  sel.SetLimitTSE(seq.GetTSE_Handle());
748 
749  CMappedFeat best;
751  int best_overlap = 0;
752 
753  for (CFeat_CI it(seq, sel); it; ++it) {
754 
755  if ( !best ) {
756 
757  best = *it;
758  best_processed = it->GetData().GetProt().GetProcessed();
759  best_overlap = s_GetOverlap(best);
760 
761  } else {
762 
763  int current_overlap = s_GetOverlap(*it);
764  CProt_ref::TProcessed current_processed = it->GetData().GetProt().GetProcessed();
765 
766  if ( best_overlap < current_overlap ) {
767 
768  best_overlap = current_overlap;
769  best_processed = current_processed;
770  best = *it;
771 
772  } else if ( (best_overlap == current_overlap) && (best_processed > current_processed) ) {
773 
774  best_processed = current_processed;
775  best = *it;
776  }
777  }
778  }
779  return best;
780 }
781 
782 // -- FeatureHeader
783 
785 {
786  x_GatherInfo(ctx);
787 }
788 
790 {
791  return eItem_FeatHeader;
792 }
793 
795 {
796  if ( ctx.Config().IsFormatFTable() ) {
797  m_Id.Reset(ctx.GetPrimaryId());
798  }
799 }
800 
801 static bool s_CheckFuzz(const CInt_fuzz& fuzz)
802 {
803  return !(fuzz.IsLim() && fuzz.GetLim() == CInt_fuzz::eLim_unk);
804 }
805 
806 static bool s_LocIsFuzz(const CMappedFeat& feat, const CSeq_loc& loc)
807 {
808  if ( feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_imp &&
809  feat.GetData().IsImp() ) { // unmappable impfeats
810  const CSeqFeatData::TImp& imp = feat.GetData().GetImp();
811  if ( imp.IsSetLoc() ) {
812  const string& imploc = imp.GetLoc();
813  if ( imploc.find('<') != NPOS || imploc.find('>') != NPOS ) {
814  return true;
815  }
816  }
817  } else { // any regular feature test location for fuzz
818  for ( CSeq_loc_CI it(loc, CSeq_loc_CI::eEmpty_Allow); it; ++it ) {
819  const CSeq_loc& l = it.GetEmbeddingSeq_loc();
820  switch ( l.Which() ) {
821  case CSeq_loc::e_Pnt:
822  {{
823  if ( l.GetPnt().IsSetFuzz() ) {
824  if ( s_CheckFuzz(l.GetPnt().GetFuzz()) ) {
825  return true;
826  }
827  }
828  break;
829  }}
831  {{
832  if ( l.GetPacked_pnt().IsSetFuzz() ) {
833  if ( s_CheckFuzz(l.GetPacked_pnt().GetFuzz()) ) {
834  return true;
835  }
836  }
837  break;
838  }}
839  case CSeq_loc::e_Int:
840  {{
841  bool fuzz = false;
842  if ( l.GetInt().IsSetFuzz_from() ) {
843  fuzz = s_CheckFuzz(l.GetInt().GetFuzz_from());
844  }
845  if ( !fuzz && l.GetInt().IsSetFuzz_to() ) {
846  fuzz = s_CheckFuzz(l.GetInt().GetFuzz_to());
847  }
848  if ( fuzz ) {
849  return true;
850  }
851  break;
852  }}
854  {{
855  if ( l.GetPacked_int().IsPartialStart(eExtreme_Biological)
856  || l.GetPacked_int().IsPartialStop(eExtreme_Biological) ) {
857  return true;
858  }
859  break;
860  }}
861  case CSeq_loc::e_Null:
862  {{
863  return true;
864  }}
865  default:
866  break;
867  }
868  }
869  }
870 
871  return false;
872 }
873 
874 static void s_AddPcrPrimersQualsAppend( string &output, const string &name, const string &str )
875 {
876  if( ! str.empty() ) {
877  if( ! output.empty() ) {
878  output += ", ";
879  }
880  output += name + str;
881  }
882 }
883 
884 // This splits a string that's comma-separated with parens at start and end
885 // (or, string might just contain a single string, so no splitting is needed,
886 // in which case the output_vec will be of size 1)
887 static void s_SplitCommaSeparatedStringInParens( vector<string> &output_vec, const string &string_to_split )
888 {
889  // nothing to do since no input
890  if( string_to_split.empty() ) {
891  return;
892  }
893 
894  // no splitting required
895  if( string_to_split[0] != '(' ) {
896  output_vec.push_back( string_to_split );
897  return;
898  }
899 
900  // if ends with closing paren, chop that off.
901  // ( It's actually a data error if we DON'T end with a ')', but we continue anyway, since
902  // we want to do the best we can with the data we get. )
903  size_t amount_to_chop_off_end = 0;
904  if( string_to_split[string_to_split.length() - 1] == ')' ) {
905  amount_to_chop_off_end = 1;
906  }
907 
908  NStr::Split( string_to_split.substr( 1, string_to_split.length() - amount_to_chop_off_end - 1), ",", output_vec, 0 );
909 }
910 
911 static const char* const sc_ValidPseudoGene[] = {
912  "allelic",
913  "processed",
914  "unitary",
915  "unknown",
916  "unprocessed"
917 };
920 
921 static bool s_IsValidPseudoGene( objects::CFlatFileConfig::TMode mode, const string& text)
922 {
923  switch(mode)
924  {
925  case objects::CFlatFileConfig::eMode_Release:
926  case objects::CFlatFileConfig::eMode_Entrez:
927  return sc_ValidPseudoGeneText.find(text.c_str()) != sc_ValidPseudoGeneText.end();
928  default:
929  return ! text.empty();
930  }
931 }
932 
933 static const char* const sc_ValidExceptionText[] = {
934  "annotated by transcript or proteomic data",
935  "rearrangement required for product",
936  "reasons given in citation",
937  "RNA editing"
938 };
941 
942 static bool s_IsValidExceptionText(const string& text)
943 {
944  return sc_LegalExceptText.find(text.c_str()) != sc_LegalExceptText.end();
945 }
946 
947 
948 static const char* const sc_ValidRefSeqExceptionText[] = {
949  "adjusted for low-quality genome",
950  "alternative processing",
951  "alternative start codon",
952  "artificial frameshift",
953  "dicistronic gene",
954  "mismatches in transcription",
955  "mismatches in translation",
956  "modified codon recognition",
957  "nonconsensus splice site",
958  "transcribed product replaced",
959  "transcribed pseudogene",
960  "translated product replaced",
961  "unclassified transcription discrepancy",
962  "unclassified translation discrepancy",
963  "unextendable partial coding region"
964 };
967 
968 static bool s_IsValidRefSeqExceptionText(const string& text)
969 {
970  return sc_LegalRefSeqExceptText.find(text.c_str()) != sc_LegalRefSeqExceptText.end();
971 }
972 
973 // -- FeatureItemBase
974 
976 (const CMappedFeat& feat,
979  const CSeq_loc* loc,
980  bool suppressAccession) :
981  CFlatItem(&ctx), m_Feat(feat), m_Feat_Tree(ftree), m_Loc(loc ? loc :
982  (feat ? &feat.GetLocation() : nullptr)),
983  m_SuppressAccession(suppressAccession)
984 {
985  if (m_Feat) {
987 
989  const CSeq_annot_Handle& ah = feat.GetAnnot();
991  if (! seh) {
992  x_SetExternal();
993  }
994  }
995 }
996 
998 {
1000  *new CFlatSeqLoc(GetLoc(), *GetContext(), CFlatSeqLoc::eType_location, false, false, this->IsSuppressAccession()),
1001  m_Feat));
1002  if ( ff ) {
1003  x_FormatQuals(*ff);
1004  }
1005  return ff;
1006 }
1007 
1008 
1009 // -- CFeatureItem
1010 
1011 string CFeatureItem::GetKey(void) const
1012 {
1014 
1017 
1018  if (GetContext()->IsProt()) { // protein
1019  if ( IsMappedFromProt() && type == CSeqFeatData::e_Prot ) {
1020  if ( subtype == CSeqFeatData::eSubtype_preprotein ||
1025  return "Precursor";
1026  }
1027  }
1028  switch ( subtype ) {
1030  return "Region";
1032  return "Bond";
1034  return "Site";
1035  default:
1036  break;
1037  }
1038  } else { // nucleotide
1039  switch ( subtype ) {
1040 
1042  return "ncRNA";
1043 
1045  return "tmRNA";
1046 
1048  if ( !ctx.IsRefSeq() ) {
1049  return "misc_feature";
1050  }
1051  break;
1052 
1057  return "misc_feature";
1058 
1059  default:
1060  break;
1061  }
1062  }
1063 
1064  // deal with unmappable impfeats
1065  if (subtype == CSeqFeatData::eSubtype_imp && type == CSeqFeatData::e_Imp) {
1066  const CSeqFeatData::TImp& imp = m_Feat.GetData().GetImp();
1067  if ( imp.IsSetKey() ) {
1068  return imp.GetKey();
1069  }
1070  }
1071 
1072  if (type == CSeqFeatData::e_Imp) {
1073  switch ( subtype ) {
1086  return "regulatory";
1087  default:
1088  break;
1089  }
1090  }
1091 
1092  return CFeatureItemBase::GetKey();
1093 }
1094 
1095 
1096 // constructor from CSeq_feat
1098 (const CMappedFeat& feat,
1101  const CSeq_loc* loc,
1102  EMapped mapped,
1103  bool suppressAccession,
1104  CConstRef<CFeatureItem> parentFeatureItem) :
1105  CFeatureItemBase(feat, ctx, ftree, loc, suppressAccession), m_Mapped(mapped)
1106 {
1107  x_GatherInfoWithParent(ctx, parentFeatureItem);
1108 }
1109 
1111 {
1112  return eItem_Feature;
1113 }
1114 
1116 {
1117  if ( s_SkipFeature(GetFeat(), GetLoc(), ctx) ) {
1118  x_SetSkip();
1119  return;
1120  }
1122  x_AddQuals(ctx, parentFeatureItem );
1123 }
1124 
1125 // ----------------------------------------------------------------------------
1127  CBioseqContext& ctx )
1128 //
1129 // Note: /partial has been depricated since DEC-2001. Current policy is to
1130 // suppress /partial in entrez and release modes and let it stand in gbench and
1131 // dump modes
1132 // ----------------------------------------------------------------------------
1133 {
1134  if ( !ctx.Config().HideUnclassPartial() ) {
1135  if ( !IsMappedFromCDNA() || !ctx.IsProt() ) {
1136  if ( m_Feat.IsSetPartial() && m_Feat.GetPartial() ) {
1137  if ( eSeqlocPartial_Complete == sequence::SeqLocPartialCheck( GetLoc(), &ctx.GetScope() ) &&
1138  !s_LocIsFuzz( m_Feat, GetLoc() ) )
1139  {
1140  x_AddQual( eFQ_partial, new CFlatBoolQVal( true ) );
1141  }
1142  }
1143  }
1144  }
1145 }
1146 
1147 // ----------------------------------------------------------------------------
1150  CSeqFeatData::ESubtype subtype )
1151 // ----------------------------------------------------------------------------
1152 {
1153  if ( subtype == CSeqFeatData::eSubtype_operon ||
1154  subtype == CSeqFeatData::eSubtype_gap ) {
1155  return;
1156  }
1157 
1158  // bail if this type of object is not allowed to carry an operon
1160  return;
1161  }
1162 
1163  const CGene_ref* gene_ref = m_Feat.GetGeneXref();
1164  if (! gene_ref || ! gene_ref->IsSuppressed()) {
1165  const CSeq_loc& operon_loc = ( ctx.IsProt() || !IsMapped() ) ?
1166  m_Feat.GetLocation() : GetLoc();
1167  CConstRef<CSeq_feat> operon
1168  = GetOverlappingOperon( operon_loc, ctx.GetScope() );
1169  if ( operon ) {
1170  const string& operon_name = operon->GetNamedQual( "operon" );
1171  if ( !operon_name.empty() ) {
1172  x_AddQual(eFQ_operon, new CFlatStringQVal(operon_name));
1173  }
1174  }
1175  }
1176 }
1177 
1178 // ----------------------------------------------------------------------------
1181  CSeqFeatData::ESubtype subtype )
1182 // ----------------------------------------------------------------------------
1183 {
1184  _ASSERT( m_Feat.GetData().IsImp() );
1185 
1186  switch ( subtype ) {
1189  break;
1192  break;
1194  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("CAAT_signal"));
1195  break;
1198  break;
1200  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("minus_35_signal"));
1201  break;
1203  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("minus_10_signal"));
1204  break;
1206  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("GC_signal"));
1207  break;
1209  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("ribosome_binding_site"));
1210  break;
1212  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("polyA_signal_sequence"));
1213  break;
1215  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("attenuator"));
1216  break;
1218  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("terminator"));
1219  break;
1222  break;
1223  default:
1224  break;
1225  }
1226 }
1227 
1228 // ----------------------------------------------------------------------------
1232  CSeqFeatData::ESubtype subtype,
1233  bool pseudo )
1234 // ----------------------------------------------------------------------------
1235 {
1236  if ( !pseudo ||
1238  subtype == CSeqFeatData::eSubtype_centromere ||
1239  subtype == CSeqFeatData::eSubtype_telomere )
1240  {
1241  return;
1242  }
1243 
1244  if (ctx.Config().DropIllegalQuals() &&
1246  {
1247  switch (subtype) {
1278  return;
1279  default:
1280  break;
1281  }
1282  }
1283  x_AddQual( eFQ_pseudo, new CFlatBoolQVal( true ) );
1284 }
1285 
1286 // ----------------------------------------------------------------------------
1288 // ----------------------------------------------------------------------------
1289 {
1290  string precursor_comment;
1291  // set precursor_comment, if needed.
1292  // It's set from the feature's product's best protein's comment
1293  if( GetContext()->IsProt() && IsMappedFromProt() && m_Feat.IsSetProduct() ) {
1294  const CSeq_id* prod_id = m_Feat.GetProduct().GetId();
1295  if (prod_id) {
1296  CBioseq_Handle prod_bioseq = GetContext()->GetScope().GetBioseqHandle(*prod_id);
1297  if( prod_bioseq ) {
1298  CMappedFeat best_prot_feat = s_GetBestProtFeature( prod_bioseq );
1299  if( best_prot_feat && best_prot_feat.IsSetComment() ) {
1300  precursor_comment = best_prot_feat.GetComment() ;
1301  }
1302  }
1303  }
1304  }
1305 
1306  if (m_Feat.IsSetComment()) {
1307  string comment = m_Feat.GetComment();
1308 
1309  TrimSpacesAndJunkFromEnds( comment, true );
1310  if ( ! comment.empty() && comment != "~" && comment != precursor_comment) {
1311  bool bAddPeriod = RemovePeriodFromEnd( comment, true );
1312  ConvertQuotes(comment);
1313  CRef<CFlatStringQVal> seqfeat_note( new CFlatStringQVal( comment ) );
1314 // if ( bAddPeriod && ! x_GetStringQual(eFQ_prot_desc ) ) {
1315  // careful! Period must be removed if we have a valid eFQ_prot_desc
1316  // Examples to test some cases: AB001488, M96268
1317  if ( bAddPeriod ) {
1318  seqfeat_note->SetAddPeriod();
1319  }
1320  x_AddQual( eFQ_seqfeat_note, seqfeat_note );
1321  }
1322  }
1323 
1324  /// also scan the annot to see if there is a comment there, if required
1325  if( ! ctx.ShowAnnotCommentAsCOMMENT() ) {
1329  if ((*it)->IsComment()) {
1330  const string & comment = (*it)->GetComment();
1331  // certain comments require special handling
1332  const static string ktRNAscanSE = "tRNA features were annotated by tRNAscan-SE";
1333  if( NStr::StartsWith(comment, ktRNAscanSE, NStr::eNocase) /* && ! x_HasMethodtRNAscanSE() */ )
1334  {
1336  // don't propagate tRNAscan-SE comments to irrelevant features
1337  continue;
1338  }
1339  }
1340  string comm = comment;
1341  TrimSpacesAndJunkFromEnds( comm, false );
1342  RemovePeriodFromEnd( comm, true );
1344  new CFlatStringQVal(comm));
1345  }
1346  }
1347  }
1348  }
1349 
1350 }
1351 
1352 // ----------------------------------------------------------------------------
1354  CBioseqContext& ctx )
1355 // ----------------------------------------------------------------------------
1356 {
1357  if ( ! m_Feat.IsSetExp_ev() ) {
1358  return;
1359  }
1360 
1361  string value;
1363  if ( ! x_GetGbValue( "experiment", value ) && ! x_GetGbValue( "inference", value ) ) {
1365  }
1366  }
1367  else if ( ! x_GetGbValue( "inference", value ) ) {
1369  }
1370 }
1371 
1372 static
1374  const CSeqFeatData& data )
1375 {
1376  switch( data.GetSubtype() ) {
1389  return true;
1390  default:
1391  return false;
1392  }
1393 }
1394 
1395 // ----------------------------------------------------------------------------
1397  CBioseqContext& ctx )
1398 //
1399 // Add any existing exception qualifiers.
1400 // Note: These include /ribosomal_slippage and /trans-splicing as special
1401 // cases. Also, some exceptions are listed as notes.
1402 // ----------------------------------------------------------------------------
1403 {
1404  const CSeqFeatData& data = m_Feat.GetData();
1405 
1406  string raw_exception;
1407 
1408  if ( ( m_Feat.IsSetExcept() && m_Feat.GetExcept() ) &&
1409  (m_Feat.IsSetExcept_text() && !m_Feat.GetExcept_text().empty()) ) {
1410  raw_exception = m_Feat.GetExcept_text();
1411  }
1412  if ( raw_exception == "" ) {
1413  return;
1414  }
1415 
1416  const bool bIsRefseq = ctx.IsRefSeq();
1417  // const bool bIsRelaxed = ( ! cfg.DropIllegalQuals() );
1418  const bool bIsRelaxed = ((! ctx.Config().IsModeRelease()) && (! ctx.Config().IsModeEntrez()));
1419 
1420  list<string> exceptions;
1421  NStr::Split( raw_exception, ",", exceptions, NStr::fSplit_Tokenize );
1422 
1423  list<string> output_exceptions;
1424  list<string> output_notes;
1425  ITERATE( list<string>, it, exceptions ) {
1426  string cur = NStr::TruncateSpaces( *it );
1427  if( cur.empty() ) {
1428  continue;
1429  }
1430 
1431  //
1432  // If exceptions are legal then it depends on the exception. Some are
1433  // turned into their own custom qualifiers. Others are allowed to stand
1434  // as exceptions, while others are turned into notes.
1435  //
1436  if ( s_IsValidExceptionText( cur ) ) {
1437  if( bIsRefseq || bIsRelaxed || data.IsCdregion() ) {
1438  output_exceptions.push_back( cur );
1439  } else {
1440  output_notes.push_back( cur );
1441  }
1442  continue;
1443  }
1444  if ( s_IsValidRefSeqExceptionText( cur ) ) {
1445  if( bIsRefseq || bIsRelaxed ) {
1446  output_exceptions.push_back( cur );
1447  } else {
1448  output_notes.push_back( cur );
1449  }
1450  continue;
1451  }
1452  if ( NStr::EqualNocase(cur, "ribosomal slippage") ) {
1453  if( data.IsCdregion() ) {
1455  } else {
1456  output_notes.push_back( cur );
1457  }
1458  continue;
1459  }
1460  if ( NStr::EqualNocase(cur, "trans-splicing") ) {
1462  x_AddQual( eFQ_trans_splicing, new CFlatBoolQVal( true ) );
1463  } else {
1464  output_notes.push_back( cur );
1465  }
1466  continue;
1467  }
1468  if ( NStr::EqualNocase(cur, "circular RNA") ) {
1469  if( data.IsRna() || data.IsCdregion() ) {
1470  x_AddQual( eFQ_circular_RNA, new CFlatBoolQVal( true ) );
1471  } else {
1472  output_notes.push_back( cur );
1473  }
1474  continue;
1475  }
1476  const bool is_cds_or_mrna = ( data.IsCdregion() ||
1477  data.GetSubtype() == CSeqFeatData::eSubtype_mRNA );
1478  if( NStr::EqualNocase(cur, "artificial location") ) {
1479  if( is_cds_or_mrna ) {
1481  } else {
1482  output_notes.push_back( cur );
1483  }
1484  continue;
1485  }
1486  if( NStr::EqualNocase(cur, "heterogeneous population sequenced") ||
1487  NStr::EqualNocase(cur, "low-quality sequence region") )
1488  {
1489  if( is_cds_or_mrna ) {
1491  } else {
1492  output_notes.push_back( cur );
1493  }
1494  continue;
1495  }
1496  else {
1497  if ( bIsRelaxed ) {
1498  output_exceptions.push_back( cur );
1499  }
1500  else {
1501  output_notes.push_back( cur );
1502  }
1503  }
1504  }
1505  if ( ! output_exceptions.empty() ) {
1506  string exception = NStr::Join( output_exceptions, ", " );
1507  x_AddQual(eFQ_exception, new CFlatStringQVal( exception ) );
1508  }
1509  if ( ! output_notes.empty() ) {
1510  string note = NStr::Join( output_notes, ", " );
1512  }
1513 }
1514 
1515 // ----------------------------------------------------------------------------
1517  CConstRef<CSeq_feat> gene_feat )
1518 // ----------------------------------------------------------------------------
1519 {
1520  if ( ! gene_feat || ! gene_feat->IsSetComment() ) {
1521  return;
1522  }
1524  gene_feat->GetComment() ) );
1525 }
1526 
1527 // ----------------------------------------------------------------------------
1529  const CGene_ref* gene_ref,
1530  const CConstRef<CSeq_feat>& gene_feat )
1531 // ----------------------------------------------------------------------------
1532 {
1533  const CSeqFeatData& data = m_Feat.GetData();
1534  CSeqFeatData::E_Choice type = data.Which();
1535 
1537  if ( ! gene_ref && gene_feat ) {
1538  gene_ref = &gene_feat->GetData().GetGene();
1539  if (gene_ref && gene_ref->IsSetDb()) {
1540  x_AddQual(
1541  eFQ_gene_xref, new CFlatXrefQVal( gene_ref->GetDb() ) );
1542  } else if ( gene_feat->IsSetDbxref() ) {
1543  x_AddQual(
1544  eFQ_gene_xref, new CFlatXrefQVal( gene_feat->GetDbxref() ) );
1545  }
1546  }
1547  }
1548 }
1549 
1550 // ----------------------------------------------------------------------------
1552  const CBioseqContext& ctx,
1553  CConstRef<CSeq_feat> gene_feat )
1554 //
1555 // For non-gene features, add /old_locus_tag, if one exists somewhere.
1556 // ----------------------------------------------------------------------------
1557 {
1558  if ( ! gene_feat ) {
1559  return;
1560  }
1561 
1562  if ( ctx.IsProt() ) {
1563  // skip if GenPept format and not gene or CDS
1564  const CSeqFeatData& data = m_Feat.GetData();
1565  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1566  if (subtype != CSeqFeatData::eSubtype_gene && subtype != CSeqFeatData::eSubtype_cdregion) {
1567  return;
1568  }
1569  }
1570 
1571  const CSeq_feat::TQual& quals = gene_feat->GetQual();
1572  for ( size_t iPos = 0; iPos < quals.size(); ++iPos ) {
1573  CRef< CGb_qual > qual = quals[ iPos ];
1574  if ( ! qual->IsSetQual() || ! qual->IsSetVal() ) {
1575  continue;
1576  }
1577  if ( qual->GetQual() == "old_locus_tag" ) {
1580  }
1581  }
1582 }
1583 
1584 // ----------------------------------------------------------------------------
1586  const CGene_ref* gene_ref,
1587  const CSeq_feat* gene_feat ) const
1588 // ----------------------------------------------------------------------------
1589 {
1590  const CSeqFeatData& data = m_Feat.GetData();
1591  CSeqFeatData::E_Choice type = data.Which();
1592  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1593 
1594  bool pseudo = m_Feat.IsSetPseudo() ? m_Feat.GetPseudo() : false;
1595  if ( type != CSeqFeatData::e_Gene &&
1596  subtype != CSeqFeatData::eSubtype_operon &&
1597  subtype != CSeqFeatData::eSubtype_gap )
1598  {
1599  if ( gene_feat && gene_feat->IsSetPseudo() && gene_feat->GetPseudo() ) {
1600  return true;
1601  const CGene_ref* altref = &gene_feat->GetData().GetGene();
1602  if ( altref && altref->IsSetPseudo() && altref->GetPseudo() ) {
1603  return true;
1604  }
1605  }
1606  if ( gene_ref && gene_ref->IsSetPseudo() && gene_ref->GetPseudo() ) {
1607  return true;
1608  }
1609  }
1610  if ( type == CSeqFeatData::e_Gene ) {
1611  if ( data.GetGene().IsSetPseudo() && data.GetGene().GetPseudo() ) {
1612  return true;
1613  }
1614  }
1615  if ( type == CSeqFeatData::e_Rna ) {
1616  if ( data.GetRna().IsSetPseudo() && data.GetRna().GetPseudo() ) {
1617  return true;
1618  }
1619  }
1620  return pseudo;
1621 }
1622 
1625  CConstRef<CFeatureItem> parentFeatureItem )
1626 {
1627  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
1628  if (! idx) return;
1629  CBioseq_Handle hdl = ctx.GetHandle();
1630  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
1631  if (! bsx) return;
1632 
1633  const CSeqFeatData& data = m_Feat.GetData();
1634  CSeqFeatData::E_Choice type = data.Which();
1635  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1636 
1637  bool is_not_genbank = false;
1638  {{
1639  ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1640  const CSeq_id& id = **id_iter;
1641 
1642  switch ( id.Which() ) {
1643  case CSeq_id_Base::e_Embl:
1644  case CSeq_id_Base::e_Ddbj:
1645  case CSeq_id_Base::e_Tpe:
1646  case CSeq_id_Base::e_Tpd:
1647  is_not_genbank = true;
1648  break;
1649  default:
1650  // do nothing
1651  break;
1652  }
1653  }
1654  }}
1655 
1656  const CGene_ref* gene_ref = nullptr;
1657  CConstRef<CSeq_feat> gene_feat;
1658  const CGene_ref* feat_gene_xref = nullptr;
1659  feat_gene_xref = m_Feat.GetGeneXref();
1660  if (! feat_gene_xref && parentFeatureItem) {
1661  feat_gene_xref = parentFeatureItem->GetFeat().GetGeneXref();
1662  }
1663  bool suppressed = false;
1664 
1665  const bool gene_forbidden_if_genbank =
1667  subtype == CSeqFeatData::eSubtype_centromere ||
1668  subtype == CSeqFeatData::eSubtype_telomere );
1669 
1670  if ( type == CSeqFeatData::e_Gene ) {
1671  } else if (subtype != CSeqFeatData::eSubtype_operon &&
1672  subtype != CSeqFeatData::eSubtype_gap &&
1673  (is_not_genbank || ! gene_forbidden_if_genbank)) {
1674  if (feat_gene_xref) {
1675  if (feat_gene_xref->IsSuppressed()) {
1676  suppressed = true;
1677  }
1678  }
1679 
1680  if (feat_gene_xref && ! suppressed) {
1681  // RW-943
1682  // gene_ref = feat_gene_xref;
1684  if (! ft) {
1685  if (parentFeatureItem) {
1686  // RW-985 fix for RW-943 dropping xrefs on sig_peptide and mat_peptide
1687  ft = bsx->GetFeatIndex (parentFeatureItem->GetFeat());
1688  } else {
1689  // SF-3276 BAM94483 coded_by CDS was not getting xref'd gene
1690  ft = bsx->GetFeatureForProduct();
1691  }
1692  }
1693  if (ft) {
1694  CRef<CFeatureIndex> fsx = ft->GetBestGene();
1695  if (fsx) {
1696  const CMappedFeat mf = fsx->GetMappedFeat();
1697  if (mf) {
1698  const CGene_ref* gr = nullptr;
1700  gf = &(mf.GetMappedFeature());
1701  gr = &(mf.GetData().GetGene());
1702  if (gr) {
1703  if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
1704  if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
1705  gene_feat = &(mf.GetMappedFeature());
1706  gene_ref = &(mf.GetData().GetGene());
1707  } else {
1708  // RW-985
1709  gene_ref = feat_gene_xref;
1710  }
1711  } else if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
1712  if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
1713  gene_feat = &(mf.GetMappedFeature());
1714  gene_ref = &(mf.GetData().GetGene());
1715  } else {
1716  // RW-985
1717  gene_ref = feat_gene_xref;
1718  }
1719  } else {
1720  // SF-3822 - map locus in xref to desc in gene
1721  gene_ref = feat_gene_xref;
1722  }
1723  }
1724  }
1725  } else {
1726  // RW-943
1727  gene_ref = feat_gene_xref;
1728  }
1729  } else if ( feat_gene_xref && (! suppressed) && subtype == CSeqFeatData::eSubtype_cdregion ) {
1730  // CAI12201 coded_by CDS on far embl record
1731  gene_ref = feat_gene_xref;
1732  }
1733  } else if ((! feat_gene_xref || ! suppressed) &&
1736  bool is_mapped = false;
1737  if (parentFeatureItem) {
1738  ft = bsx->GetFeatIndex (parentFeatureItem->GetFeat());
1739  if (ft) {
1740  if (subtype == CSeqFeatData::eSubtype_preprotein ||
1745  try {
1746  if ( m_Feat.IsSetXref() ) {
1747  feat_gene_xref = m_Feat.GetGeneXref();
1748  if ( feat_gene_xref ) {
1749  gene_ref = feat_gene_xref;
1750  is_mapped = true;
1751  }
1752  }
1753  if (! is_mapped) {
1754  CRef<CFeatureIndex> fsx = ft->GetBestGene();
1755  if (fsx) {
1756  const CMappedFeat mf = fsx->GetMappedFeat();
1757  if (mf) {
1758  gene_feat = &(mf.GetMappedFeature());
1759  gene_ref = &(mf.GetData().GetGene());
1760  is_mapped = true;
1761  }
1762  }
1763  }
1764  if (! is_mapped) {
1765  // e.g., check sig_peptide for gene overlapping parent CDS
1766  CSeq_feat_Handle parent_feat_handle;
1767  parent_feat_handle = parentFeatureItem->GetFeat();
1769  gene_feat, parent_feat_handle );
1770  is_mapped = true;
1771  }
1772  } catch (CException&) {}
1773  }
1774  }
1775  } else {
1776  ft = bsx->GetFeatIndex (m_Feat);
1777  if (! ft) {
1778  ft = bsx->GetFeatureForProduct();
1779  if (! ft) {
1780  // RW-1646
1781  CBioseq_Handle hdl = ctx.GetHandle();
1782  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
1783  const CRef<CSeqMasterIndex>& midx = idx->GetMasterIndex();
1784  CRef<feature::CFeatTree> ftree = midx->GetFeatTree();
1785  ftree->AddGenesForFeat(m_Feat, ctx.GetAnnotSelector());
1786  try {
1787  const CMappedFeat mf = ftree->GetBestGene(m_Feat);
1788  if (mf) {
1789  gene_feat = &(mf.GetMappedFeature());
1790  gene_ref = &(mf.GetData().GetGene());
1791  }
1792  } catch (CException&) {}
1793  }
1794  }
1795  }
1796  if (ft && (! is_mapped)) {
1797  CRef<CFeatureIndex> fsx = ft->GetBestGene();
1798  if (fsx) {
1799  const CMappedFeat mf = fsx->GetMappedFeat();
1800  if (mf) {
1801  gene_feat = &(mf.GetMappedFeature());
1802  gene_ref = &(mf.GetData().GetGene());
1803  }
1804  } else if (feat_gene_xref) {
1805  // last resort, e.g., MH013512 after first nuc-prot set
1806  gene_ref = feat_gene_xref;
1807  }
1808  }
1809  }
1810  }
1811 
1812  bool pseudo = x_GetPseudo(gene_ref, gene_feat );
1813  if ( ctx.IsEMBL() || ctx.IsDDBJ() ) {
1815  pseudo = false;
1816  }
1817  if ( type == CSeqFeatData::e_Prot ) {
1818  pseudo = false;
1819  }
1820  }
1821 
1822  //
1823  // Collect qualifiers that are specific to a single or just a few feature
1824  // types:
1825  //
1826  switch ( type ) {
1828  x_AddQualsCdregionIdx(m_Feat, ctx, pseudo);
1829  break;
1830  case CSeqFeatData::e_Rna:
1831  x_AddQualsRna(m_Feat, ctx, pseudo);
1832  break;
1833  case CSeqFeatData::e_Prot:
1834  x_AddQualsProt(ctx, pseudo);
1835  break;
1837  x_AddQualsRegion( ctx );
1838  break;
1839  case CSeqFeatData::e_Site:
1840  x_AddQualsSite( ctx );
1841  break;
1842  case CSeqFeatData::e_Bond:
1843  x_AddQualsBond( ctx );
1844  break;
1847  break;
1849  x_AddQualsNonStd( ctx );
1850  break;
1851  case CSeqFeatData::e_Het:
1852  x_AddQualsHet( ctx );
1853  break;
1856  break;
1857  default:
1858  break;
1859  }
1860 
1861  //
1862  // Collect qualifiers that are common to most feature types:
1863  //
1864  x_AddQualPartial( ctx );
1865  x_AddQualDbXref( ctx );
1866  x_AddQualExt();
1867  x_AddQualExpInv( ctx );
1870  x_AddQualNote( gene_feat );
1871  x_AddQualOldLocusTag( ctx, gene_feat );
1872  x_AddQualDb( gene_ref );
1873  x_AddQualGeneXref( gene_ref, gene_feat );
1874  if (bsx->HasOperon()) {
1875  x_AddQualOperon( ctx, subtype );
1876  }
1877  x_AddQualsGene( ctx, gene_ref, gene_feat, gene_ref ? false : gene_feat.NotEmpty() );
1878 
1879  x_AddQualPseudo( ctx, type, subtype, pseudo );
1880  x_AddQualsGb( ctx );
1881 
1882  // dynamic mapping of old features to regulatory with regulatory_class qualifier
1883  if ( type == CSeqFeatData::e_Imp ) {
1884  x_AddQualsRegulatoryClass ( ctx, subtype );
1885  }
1886 
1888 
1889  // cleanup (drop illegal quals, duplicate information etc.)
1890  x_CleanQuals( gene_ref );
1891 
1892 
1893 }
1894 
1895 // ----------------------------------------------------------------------------
1898  CConstRef<CFeatureItem> parentFeatureItem )
1899 //
1900 // Add the various qualifiers to this feature. Top level function.
1901 // ----------------------------------------------------------------------------
1902 {
1903 // /**fl**/
1904  // leaving this here since it's so useful for debugging purposes.
1905  //21822,22172
1906  /* if(
1907  (GetLoc().GetStart(eExtreme_Biological) == 21821 &&
1908  GetLoc().GetStop(eExtreme_Biological) == 22171) ||
1909  (GetLoc().GetStop(eExtreme_Biological) == 21821 &&
1910  GetLoc().GetStart(eExtreme_Biological) == 22171)
1911  ) {
1912  cerr << ""; // a do-nothing statement in case we forget to comment it out
1913  } */
1914 // /**fl**/
1915 
1916  if ( ctx.Config().IsFormatFTable() ) {
1917  x_AddFTableQuals( ctx );
1918  return;
1919  }
1920 
1921  if ( ctx.UsingSeqEntryIndex() ) {
1922  x_AddQualsIdx(ctx, parentFeatureItem);
1923  return;
1924  }
1925 
1926  // SQD-4444 : pass annot selector from the context structure
1927  m_Feat_Tree->AddGenesForFeat(m_Feat, ctx.GetAnnotSelector());
1928 
1929  //
1930  // Collect/Compute data that will be shared between several qualifier
1931  // collectors:
1932  //
1933  const CSeqFeatData& data = m_Feat.GetData();
1934  CSeqFeatData::E_Choice type = data.Which();
1935  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1936 // /**fl**/>>
1937 // if ( subtype == CSeqFeatData::eSubtype_sig_peptide_aa ||
1938 // subtype == CSeqFeatData::eSubtype_sig_peptide )
1939 // {
1940 // cerr << "Break" << endl;
1941 // }
1942 // <</**fl**/
1943 
1944  // check if this is some kind of Genbank record (some of the logic may be a little different in that case)
1945  bool is_not_genbank = false;
1946  {{
1947  ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1948  const CSeq_id& id = **id_iter;
1949 
1950  switch ( id.Which() ) {
1951  case CSeq_id_Base::e_Embl:
1952  case CSeq_id_Base::e_Ddbj:
1953  case CSeq_id_Base::e_Tpe:
1954  case CSeq_id_Base::e_Tpd:
1955  is_not_genbank = true;
1956  break;
1957  default:
1958  // do nothing
1959  break;
1960  }
1961  }
1962  }}
1963 
1964  const CGene_ref* gene_ref = nullptr;
1965  CConstRef<CSeq_feat> gene_feat;
1966  const CGene_ref* feat_gene_xref = m_Feat.GetGeneXref();
1967  bool suppressed = false;
1968 
1969  const bool gene_forbidden_if_genbank =
1971  subtype == CSeqFeatData::eSubtype_centromere ||
1972  subtype == CSeqFeatData::eSubtype_telomere );
1973 
1974  if ( type == CSeqFeatData::e_Gene ) {
1975  } else if (subtype != CSeqFeatData::eSubtype_operon &&
1976  subtype != CSeqFeatData::eSubtype_gap &&
1977  (is_not_genbank || ! gene_forbidden_if_genbank)) {
1978  if (feat_gene_xref) {
1979  if (feat_gene_xref->IsSuppressed()) {
1980  suppressed = true;
1981  }
1982  }
1983  if (feat_gene_xref && ! suppressed &&
1984  ! CGeneFinder::ResolveGeneXref(feat_gene_xref, ctx.GetTopLevelEntry())) {
1985  gene_ref = feat_gene_xref;
1986  } else if ((! feat_gene_xref || ! suppressed) &&
1988 
1989  bool is_mapped = false;
1990  try {
1991  CMappedFeat mapped_gene = ctx.GetFeatTree().GetBestGene(m_Feat);
1992  if (mapped_gene) {
1993  gene_feat = mapped_gene.GetOriginalSeq_feat();
1994  gene_ref = &gene_feat->GetData().GetGene();
1995  is_mapped = true;
1996  }
1997  } catch (CException&) {}
1998  if (! is_mapped) {
1999  try {
2000  CMappedFeat mapped_gene = m_Feat_Tree->GetBestGene(m_Feat);
2001  if (mapped_gene) {
2002  gene_feat = mapped_gene.GetOriginalSeq_feat();
2003  gene_ref = &gene_feat->GetData().GetGene();
2004  is_mapped = true;
2005  }
2006  } catch (CException&) {}
2007  }
2008  if (! is_mapped) {
2009  try {
2010  // e.g., check sig_peptide for gene overlapping parent CDS
2011  CSeq_feat_Handle parent_feat_handle;
2012  if( parentFeatureItem ) {
2013  parent_feat_handle = parentFeatureItem->GetFeat();
2015  gene_feat, parent_feat_handle );
2016  }
2017  } catch (CException&) {}
2018  }
2019  }
2020  }
2021 
2022  bool pseudo = x_GetPseudo(gene_ref, gene_feat );
2023 
2024  //
2025  // Collect qualifiers that are specific to a single or just a few feature
2026  // types:
2027  //
2028  switch ( type ) {
2030  x_AddQualsCdregion(m_Feat, ctx, pseudo);
2031  break;
2032  case CSeqFeatData::e_Rna:
2033  x_AddQualsRna(m_Feat, ctx, pseudo);
2034  break;
2035  case CSeqFeatData::e_Prot:
2036  x_AddQualsProt(ctx, pseudo);
2037  break;
2039  x_AddQualsRegion( ctx );
2040  break;
2041  case CSeqFeatData::e_Site:
2042  x_AddQualsSite( ctx );
2043  break;
2044  case CSeqFeatData::e_Bond:
2045  x_AddQualsBond( ctx );
2046  break;
2049  break;
2051  x_AddQualsNonStd( ctx );
2052  break;
2053  case CSeqFeatData::e_Het:
2054  x_AddQualsHet( ctx );
2055  break;
2058  break;
2059  default:
2060  break;
2061  }
2062 
2063  //
2064  // Collect qualifiers that are common to most feature types:
2065  //
2066  x_AddQualPartial( ctx );
2067  x_AddQualDbXref( ctx );
2068  x_AddQualExt();
2069  x_AddQualExpInv( ctx );
2072  x_AddQualNote( gene_feat );
2073  x_AddQualOldLocusTag( ctx, gene_feat );
2074  x_AddQualDb( gene_ref );
2075  x_AddQualGeneXref( gene_ref, gene_feat );
2076  x_AddQualOperon( ctx, subtype );
2077  x_AddQualsGene( ctx, gene_ref, gene_feat, gene_ref ? false : gene_feat.NotEmpty() );
2078 
2079  x_AddQualPseudo( ctx, type, subtype, pseudo );
2080  x_AddQualsGb( ctx );
2081 
2082  // dynamic mapping of old features to regulatory with regulatory_class qualifier
2083  if ( type == CSeqFeatData::e_Imp ) {
2084  x_AddQualsRegulatoryClass ( ctx, subtype );
2085  }
2086 
2088 
2089  // cleanup (drop illegal quals, duplicate information etc.)
2090  x_CleanQuals( gene_ref );
2091 }
2092 
2093 
2094 static const string s_TrnaList[] = {
2095  "tRNA-Gap",
2096  "tRNA-Ala",
2097  "tRNA-Asx",
2098  "tRNA-Cys",
2099  "tRNA-Asp",
2100  "tRNA-Glu",
2101  "tRNA-Phe",
2102  "tRNA-Gly",
2103  "tRNA-His",
2104  "tRNA-Ile",
2105  "tRNA-Xle",
2106  "tRNA-Lys",
2107  "tRNA-Leu",
2108  "tRNA-Met",
2109  "tRNA-Asn",
2110  "tRNA-Pyl",
2111  "tRNA-Pro",
2112  "tRNA-Gln",
2113  "tRNA-Arg",
2114  "tRNA-Ser",
2115  "tRNA-Thr",
2116  "tRNA-Sec",
2117  "tRNA-Val",
2118  "tRNA-Trp",
2119  "tRNA-OTHER",
2120  "tRNA-Tyr",
2121  "tRNA-Glx",
2122  "tRNA-TERM"
2123 };
2124 
2125 
2126 static const string& s_AaName(int aa)
2127 {
2128  int idx = 255;
2129 
2130  if (aa != '*') {
2131  idx = aa - 64;
2132  } else {
2133  idx = 27;
2134  }
2135  if ( idx > 0 && idx < ArraySize(s_TrnaList) ) {
2136  return s_TrnaList [idx];
2137  }
2138  return kEmptyStr;
2139 }
2140 
2141 
2142 static int s_ToIupacaa(int aa)
2143 {
2144  vector<char> n(1, static_cast<char>(aa));
2145  vector<char> i;
2147  return i.front();
2148 }
2149 
2150 // ----------------------------------------------------------------------------
2152  const CMappedFeat& feat,
2154  bool pseudo )
2155 // ----------------------------------------------------------------------------
2156 {
2157 
2159  const CRNA_ref& rna = feat.GetData().GetRna();
2160  const CFlatFileConfig& cfg = ctx.Config();
2161  CScope& scope = ctx.GetScope();
2162 
2163  ///
2164  /// always output transcript_id
2165  ///
2166  {{
2167  EFeatureQualifier slot =
2168  (ctx.IsRefSeq() || cfg.IsModeDump() || cfg.IsModeGBench()) ?
2170  try {
2171  if (feat.IsSetProduct()) {
2172  CConstRef<CSeq_id> sip(feat.GetProduct().GetId());
2173  if (sip) {
2174  CBioseq_Handle prod =
2175  scope.GetBioseqHandleFromTSE(*sip, ctx.GetHandle());
2176  if ( prod ) {
2177  x_AddProductIdQuals(prod, slot);
2178  } else {
2179  string acc;
2180  sip->GetLabel(&acc, CSeq_id::eBoth);
2183  if (besth) {
2184  acc.clear();
2185  besth.GetSeqId()->GetLabel(&acc, CSeq_id::eContent);
2186  }
2187  if( acc.empty() && ! cfg.DropIllegalQuals() ) {
2188  //sure of that? doesn't look right---
2189  x_AddQual(slot, new CFlatStringQVal(
2190  NStr::NumericToString(sip->GetGi()) ) );
2191  }
2192  if (!acc.empty()) {
2193  if ( !cfg.DropIllegalQuals() || IsValidAccession(acc)) {
2194  CRef<CSeq_id> acc_id(new CSeq_id(acc));
2195  x_AddQual(slot, new CFlatSeqIdQVal(*acc_id));
2196  }
2197  /*
2198  if (! (cfg.HideGI() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes())) {
2199  x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*sip, true));
2200  }
2201  */
2202  }
2203  }
2204  }
2205  }
2206  }
2207  catch (CObjmgrUtilException&) {
2208  }
2209  }}
2210 
2211  CRNA_ref::TType rna_type = rna.IsSetType() ?
2212  rna.GetType() : CRNA_ref::eType_unknown;
2213  switch ( rna_type ) {
2214  case CRNA_ref::eType_tRNA:
2215  {
2216  if ( !pseudo && ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) {
2217  CSeqVector vec(feat.GetLocation(), scope);
2219  string transcription;
2220  vec.GetSeqData(0, vec.size(), transcription);
2221  x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));
2222  }
2223  if (rna.IsSetExt()) {
2224  const CRNA_ref::C_Ext& ext = rna.GetExt();
2225  switch (ext.Which()) {
2227  {
2228  // amino acid could not be parsed into structured form
2229  if (!cfg.DropIllegalQuals()) {
2231  new CFlatStringQVal(ext.GetName()));
2232  } else {
2234  new CFlatStringQVal("tRNA-OTHER"));
2235  }
2236  break;
2237  }
2239  {
2240  const CTrna_ext& trna = ext.GetTRNA();
2241  int aa = 0;
2242  if ( trna.IsSetAa() && trna.GetAa().IsNcbieaa() ) {
2243  aa = trna.GetAa().GetNcbieaa();
2244  }
2245  if ( cfg.IupacaaOnly() ) {
2246  aa = s_ToIupacaa(aa);
2247  }
2248  const string& aa_str = s_AaName(aa);
2249  string amino_acid_str = aa_str;
2250 
2251  if ( !aa_str.empty() ) {
2252  const string& ac_str = aa_str;
2253  if (NStr::CompareNocase (ac_str, "tRNA-Met") == 0) {
2254  for (auto& gbqual : m_Feat.GetQual()) {
2255  if (!gbqual->IsSetQual() || !gbqual->IsSetVal()) continue;
2256  if (NStr::CompareNocase( gbqual->GetQual(), "product") != 0) continue;
2257  if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-fMet") == 0) {
2258  amino_acid_str = "tRNA-fMet";
2259  }
2260  if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-iMet") == 0) {
2261  amino_acid_str = "tRNA-iMet";
2262  }
2263  }
2264  } else if (NStr::CompareNocase (ac_str, "tRNA-Ile") == 0) {
2265  for (auto& gbqual : m_Feat.GetQual()) {
2266  if (!gbqual->IsSetQual() || !gbqual->IsSetVal()) continue;
2267  if (NStr::CompareNocase( gbqual->GetQual(), "product") != 0) continue;
2268  if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-Ile2") == 0) {
2269  amino_acid_str = "tRNA-Ile2";
2270  }
2271  }
2272  }
2273  x_AddQual(eFQ_product, new CFlatStringQVal(amino_acid_str));
2274  if ( trna.IsSetAnticodon() && !ac_str.empty() ) {
2276  new CFlatAnticodonQVal(trna.GetAnticodon(),
2277  ac_str.substr(5, NPOS)));
2278  }
2279  }
2280  if ( trna.IsSetCodon() ) {
2281  const string& comment =
2283  x_AddQual(eFQ_trna_codons, new CFlatTrnaCodonsQVal(trna, comment));
2284  }
2285  //x_AddQual(eFQ_exception_note, new CFlatStringQVal("tRNA features were annotated by tRNAscan-SE."));
2286  break;
2287  }
2288  default:
2289  break;
2290  } // end of internal switch
2291  }
2292  break;
2293  }
2294  case CRNA_ref::eType_mRNA:
2295  case CRNA_ref::eType_rRNA:
2296  {
2297  if ( !pseudo && ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) {
2298  CSeqVector vec(feat.GetLocation(), scope);
2300  string transcription;
2301  vec.GetSeqData(0, vec.size(), transcription);
2302  x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));
2303  }
2304  // intentional fall through
2305  }
2306  default:
2307  switch ( subtype ) {
2308 
2310  if ( ! rna.IsSetExt() ) {
2311  break;
2312  }
2313  const CRNA_ref_Base::TExt& ext = rna.GetExt();
2314  if ( ! ext.IsGen() ) {
2315  break;
2316  }
2317  break;
2318  }
2320  if ( ! rna.IsSetExt() ) {
2321  break;
2322  }
2323  const CRNA_ref_Base::TExt& ext = rna.GetExt();
2324  if ( ext.IsGen() && ext.GetGen().IsSetQuals() ) {
2325 
2326  const list< CRef< CRNA_qual > >& quals = ext.GetGen().GetQuals().Get();
2327  list< CRef< CRNA_qual > >::const_iterator it = quals.begin();
2328  for ( ; it != quals.end(); ++it ) {
2329  if ( (*it)->IsSetQual() && (*it)->IsSetVal() ) {
2330  if ( (*it)->GetQual() == "tag_peptide" ) {
2332  new CFlatStringQVal(
2333  (*it)->GetVal(), CFormatQual::eUnquoted ) );
2334  break;
2335  }
2336  }
2337  }
2338  }
2339  break;
2340  }
2343  if ( ! rna.IsSetExt() ) {
2344  break;
2345  }
2346  const CRNA_ref_Base::TExt& ext = rna.GetExt();
2347  if ( ext.IsName() ) {
2348  string strName = ext.GetName();
2349  if ( strName != "misc_RNA" ) {
2350  x_AddQual( eFQ_product, new CFlatStringQVal( strName ) );
2351  }
2352  }
2353  break;
2354  }
2355  default:
2356  if ( rna.IsSetExt() && rna.GetExt().IsName() ) {
2357  x_AddQual( eFQ_product, new CFlatStringQVal( rna.GetExt().GetName() ) );
2358  }
2359  break;
2360  }
2361  } // end of switch
2362 
2363  // some things to extract from RNA-gen
2364  if( rna.IsSetExt() && rna.GetExt().IsGen() ) {
2365  const CRNA_gen &gen = rna.GetExt().GetGen();
2366  if ( gen.IsSetClass() ) {
2367  if (gen.IsLegalClass()) {
2369  new CFlatStringQVal( gen.GetClass() ) );
2370  } else {
2372  new CFlatStringQVal( "other" ));
2374  new CFlatStringQVal( gen.GetClass() ) );
2375  }
2376  }
2377 
2378  if ( gen.IsSetProduct() && ! x_HasQual(eFQ_product) ) {
2380  new CFlatStringQVal( gen.GetProduct() ) );
2381  }
2382  }
2383 }
2384 
2385 // ----------------------------------------------------------------------------
2387  CBioseq_Handle& bsh,
2389  bool pseudo )
2390 // ----------------------------------------------------------------------------
2391 {
2392  const CFlatFileConfig& cfg = ctx.Config();
2393  CScope& scope = ctx.GetScope();
2394 
2395  if ( pseudo || cfg.NeverTranslateCDS() ) {
2396  return;
2397  }
2398 
2399  string translation;
2400  if ( cfg.AlwaysTranslateCDS() || (cfg.TranslateIfNoProduct() && !bsh) ) {
2402  translation, false /* don't include stops */);
2403  }
2404  else if ( bsh ) {
2405  CSeqVector seqv = bsh.GetSeqVector();
2406  /*
2407  CSeq_data::E_Choice coding = cfg.IupacaaOnly() ?
2408  CSeq_data::e_Iupacaa : CSeq_data::e_Ncbieaa;
2409  */
2411  seqv.SetCoding( coding );
2412 
2413  try {
2414  // an exception can occur here if the specified length doesn't match the actual length.
2415  // Although I don't know of any released .asn files with this problem, it can occur
2416  // in submissions.
2417  seqv.GetSeqData( 0, seqv.size(), translation );
2418  } catch( const CException & ) {
2419  // we're unable to do the translation
2420  translation.clear();
2421  }
2422  }
2423 
2424  if (!NStr::IsBlank(translation)) {
2425  x_AddQual(eFQ_translation, new CFlatStringQVal( translation ) );
2426  }
2427 }
2428 
2429 // ----------------------------------------------------------------------------
2431  const CCdregion& cdr,
2432  CBioseqContext& ctx )
2433 // ----------------------------------------------------------------------------
2434 {
2435  if ( ! cdr.IsSetCode() ) {
2436  return;
2437  }
2438  int gcode = cdr.GetCode().GetId();
2439  if ( gcode == 255 ) {
2440  return;
2441  }
2442  if ( ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() || gcode > 1 ) {
2444  }
2445 }
2446 
2447 // ----------------------------------------------------------------------------
2449  const CCdregion& cdr,
2450  CBioseqContext& ctx )
2451 // ----------------------------------------------------------------------------
2452 {
2453  CCdregion::TFrame frame = cdr.GetFrame();
2454  if (frame == CCdregion::eFrame_not_set)
2455  frame = CCdregion::eFrame_one;
2456 
2457  // codon_start qualifier is always shown for nucleotides and for proteins mapped
2458  // from cDNA, otherwise only when the frame is not 1.
2459  if ( !ctx.IsProt() || !IsMappedFromCDNA() || frame != CCdregion::eFrame_one ) {
2460  x_AddQual( eFQ_codon_start, new CFlatIntQVal( frame ) );
2461  }
2462 }
2463 
2464 // ----------------------------------------------------------------------------
2466  const CCdregion& cdr,
2468  const int inset )
2469 // ----------------------------------------------------------------------------
2470 {
2471  CCdregion::TFrame frame = cdr.GetFrame();
2472  if (frame == CCdregion::eFrame_not_set) {
2473  frame = CCdregion::eFrame_one;
2474  }
2475 
2476  if (inset == 1) {
2477  if (frame == CCdregion::eFrame_one) {
2478  frame = CCdregion::eFrame_three;
2479  } else if (frame == CCdregion::eFrame_two) {
2480  frame = CCdregion::eFrame_one;
2481  } else if (frame == CCdregion::eFrame_three) {
2482  frame = CCdregion::eFrame_two;
2483  }
2484  } else if (inset == 2) {
2485  if (frame == CCdregion::eFrame_one) {
2486  frame = CCdregion::eFrame_two;
2487  } else if (frame == CCdregion::eFrame_two) {
2488  frame = CCdregion::eFrame_three;
2489  } else if (frame == CCdregion::eFrame_three) {
2490  frame = CCdregion::eFrame_one;
2491  }
2492  }
2493 
2494  // codon_start qualifier is always shown for nucleotides and for proteins mapped
2495  // from cDNA, otherwise only when the frame is not 1.
2496  if ( !ctx.IsProt() || !IsMappedFromCDNA() || frame != CCdregion::eFrame_one ) {
2497  x_AddQual( eFQ_codon_start, new CFlatIntQVal( frame ) );
2498  }
2499 }
2500 
2501 // ----------------------------------------------------------------------------
2503  const CCdregion& cdr,
2504  CBioseqContext& ctx )
2505 // ----------------------------------------------------------------------------
2506 {
2507  if ( !ctx.IsProt() || !IsMappedFromCDNA() ) {
2508  if ( cdr.IsSetCode_break() ) {
2510  new CFlatCodeBreakQVal( cdr.GetCode_break() ) );
2511  }
2512  }
2513 }
2514 
2515 // ----------------------------------------------------------------------------
2517  const CCdregion& cdr,
2519  string& tr_ex )
2520 // ----------------------------------------------------------------------------
2521 {
2522  if ( !ctx.IsProt() || !IsMappedFromCDNA() ) {
2523  if ( cdr.IsSetCode_break() ) {
2525  new CFlatCodeBreakQVal( cdr.GetCode_break() ) );
2526  } else if ( tr_ex.length() > 0 ) {
2527  x_AddQual(eFQ_seqfeat_note, new CFlatStringQVal("unprocessed translation exception: " + tr_ex));
2528  }
2529  }
2530 }
2531 
2532 // ----------------------------------------------------------------------------
2534  const CCdregion& cdr,
2535  CBioseqContext& ctx )
2536 // ----------------------------------------------------------------------------
2537 {
2538  static const string conflict_msg =
2539  "Protein sequence is in conflict with the conceptual translation";
2540 
2541  const bool conflict_set = (cdr.IsSetConflict() && cdr.GetConflict());
2542 
2543  if (conflict_set)
2544  {
2545  if (!ctx.IsProt() || !IsMappedFromCDNA()) {
2546  bool has_prot = false;
2547  if (m_Feat.IsSetProduct() && m_Feat.GetProduct().GetId()) {
2548  has_prot = (sequence::GetLength(m_Feat.GetProduct(), &ctx.GetScope()) > 0);
2549  }
2550  if (has_prot) {
2551  x_AddQual(eFQ_prot_conflict, new CFlatStringQVal(conflict_msg));
2552  }
2553  }
2554  }
2555 }
2556 
2557 // ----------------------------------------------------------------------------
2559  CBioseqContext& ctx )
2560 // ----------------------------------------------------------------------------
2561 {
2562  //if ( ctx.IsProt() && IsMappedFromCDNA() ) {
2563  if ( ctx.IsProt() ) {
2565  }
2566 }
2567 
2568 // ----------------------------------------------------------------------------
2570  const CBioseq_Handle& protHandle )
2571 // ----------------------------------------------------------------------------
2572 {
2573  if ( ! protHandle ) {
2574  return;
2575  }
2576  CSeqdesc_CI comm( protHandle, CSeqdesc::e_Comment, 1 );
2577  if ( comm && !comm->GetComment().empty() ) {
2578  string comment = comm->GetComment();
2579 
2580  TrimSpacesAndJunkFromEnds( comment, true );
2581  /* const bool bAddPeriod = */ RemovePeriodFromEnd( comment, true );
2582  CFlatStringQVal *commentQVal = new CFlatStringQVal( comment );
2583  /* if( bAddPeriod ) {
2584  commentQVal->SetAddPeriod();
2585  } */
2586  x_AddQual( eFQ_prot_comment, commentQVal );
2587  }
2588 }
2589 
2590 // ----------------------------------------------------------------------------
2592  const CBioseq_Handle& protHandle )
2593 // ----------------------------------------------------------------------------
2594 {
2595  if ( ! protHandle ) {
2596  return;
2597  }
2598  CSeqdesc_CI mi( protHandle, CSeqdesc::e_Molinfo );
2599  if ( mi ) {
2600  CMolInfo::TTech prot_tech = mi->GetMolinfo().GetTech();
2601  if ( prot_tech > CMolInfo::eTech_standard &&
2602  prot_tech != CMolInfo::eTech_concept_trans &&
2603  prot_tech != CMolInfo::eTech_concept_trans_a ) {
2604  if ( !GetTechString( prot_tech ).empty() ) {
2606  "Method: " + GetTechString( prot_tech) ) );
2607  }
2608  }
2609  }
2610 }
2611 
2612 // ----------------------------------------------------------------------------
2615  CBioseq_Handle& protHandle,
2616  const CProt_ref*& protRef,
2617  CMappedFeat& protFeat,
2618  CConstRef<CSeq_id>& protId )
2619 // ----------------------------------------------------------------------------
2620 {
2621  const CFlatFileConfig& cfg = ctx.Config();
2622  CScope& scope = ctx.GetScope();
2623 
2624  protId.Reset( m_Feat.GetProduct().GetId() );
2625  if ( protId ) {
2626  if ( !cfg.AlwaysTranslateCDS() ) {
2628  if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2629  get_flag = CScope::eGetBioseq_All;
2630  }
2631  protHandle = scope.GetBioseqHandle(*protId, get_flag);
2632  }
2633  }
2634 
2635  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2636  if (! idx) return;
2637  CBioseq_Handle hdl = ctx.GetHandle();
2638  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
2639  if (! bsx) return;
2640 
2641 
2642  protRef = nullptr;
2643  if ( protHandle ) {
2644  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2645  if (! idx) return;
2646  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (protHandle);
2647  if (bsx) {
2649  if (pfx) {
2650  protFeat = pfx->GetMappedFeat();
2651  if ( protFeat ) {
2652  protRef = &( protFeat.GetData().GetProt() );
2653  }
2654  }
2655  } else {
2656  x_GetAssociatedProtInfo(ctx, protHandle, protRef, protFeat, protId);
2657  }
2658  }
2659 }
2660 
2661 // ----------------------------------------------------------------------------
2664  CBioseq_Handle& protHandle,
2665  const CProt_ref*& protRef,
2666  CMappedFeat& protFeat,
2667  CConstRef<CSeq_id>& protId )
2668 // ----------------------------------------------------------------------------
2669 {
2670  const CFlatFileConfig& cfg = ctx.Config();
2671  CScope& scope = ctx.GetScope();
2672 
2673  protId.Reset( m_Feat.GetProduct().GetId() );
2674  if ( protId ) {
2675  if ( !cfg.AlwaysTranslateCDS() ) {
2677  if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2678  get_flag = CScope::eGetBioseq_All;
2679  }
2680  protHandle = scope.GetBioseqHandle(*protId, get_flag);
2681  }
2682  }
2683 
2684  protRef = nullptr;
2685  if ( protHandle ) {
2686  protFeat = s_GetBestProtFeature( protHandle );
2687  if ( protFeat ) {
2688  protRef = &( protFeat.GetData().GetProt() );
2689  }
2690  }
2691 }
2692 
2693 // ----------------------------------------------------------------------------
2695  const CProt_ref* protRef,
2696  const CMappedFeat& protFeat )
2697 // ----------------------------------------------------------------------------
2698 {
2699  if ( ! protRef ) {
2700  return;
2701  }
2702  if ( protFeat.IsSetComment() ) {
2703  if ( protRef->GetProcessed() == CProt_ref::eProcessed_not_set ||
2705  string prot_note = protFeat.GetComment();
2706  TrimSpacesAndJunkFromEnds( prot_note, true );
2707  RemovePeriodFromEnd( prot_note, true );
2708  x_AddQual( eFQ_prot_note, new CFlatStringQVal( prot_note ) );
2709  }
2710  }
2711 }
2712 
2713 
2714 // ----------------------------------------------------------------------------
2717  const CBioseq_Handle& protHandle,
2718  CConstRef<CSeq_id> protId )
2719 // ----------------------------------------------------------------------------
2720 {
2721  if ( protHandle ) {
2722  CConstRef<CBioseq> pBioseq( protHandle.GetCompleteBioseq() );
2723 
2724  // extract the *one* usable general seq-id (if there is one)
2725  // (the loop sets pTheOneGeneralSeqId, or leaves it NULL
2726  // if there is zero or more than one usable general seqids)
2727  CConstRef<CSeq_id> pTheOneUsableGeneralSeqId;
2728  FOR_EACH_SEQID_ON_BIOSEQ(seqid_ci, *pBioseq) {
2729  const CSeq_id & seqid = **seqid_ci;
2730  if( ! seqid.IsGeneral() ) {
2731  // not just general, so ignore all of them
2732  pTheOneUsableGeneralSeqId.Reset();
2733  break;
2734  }
2735 
2736  const CDbtag & db_tag = seqid.GetGeneral();
2737 
2738  // db types to ignore
2739  static const char* const sc_IgnoredDbs[] = {
2740  "BankIt",
2741  "NCBIFILE",
2742  "PID",
2743  "SMART",
2744  "TMSMART",
2745  };
2746  typedef CStaticArraySet<const char*, PNocase> TIgnoredDbSet;
2747  DEFINE_STATIC_ARRAY_MAP(TIgnoredDbSet, sc_IgnoredDbSet, sc_IgnoredDbs );
2748 
2749  // get db and tag
2750  const string & sDb = GET_STRING_FLD_OR_BLANK(db_tag, Db);
2751  string sTag;
2752  if( FIELD_IS_SET(db_tag, Tag) ) {
2753  stringstream sTagStrm;
2754  db_tag.GetTag().AsString(sTagStrm);
2755  // swap faster than assignment
2756  sTagStrm.str().swap(sTag);
2757  }
2758 
2759  if( ! sDb.empty() && ! sTag.empty() &&
2760  sc_IgnoredDbSet.find(sDb.c_str()) == sc_IgnoredDbSet.end() )
2761  {
2762  if( pTheOneUsableGeneralSeqId ) {
2763  // more than one, so ignore all of them
2764  pTheOneUsableGeneralSeqId.Reset();
2765  break;
2766  } else {
2767  pTheOneUsableGeneralSeqId = *seqid_ci;
2768  }
2769  }
2770  }
2771 
2772  CSeq_id::E_Choice eLastRegularChoice = CSeq_id::e_not_set;
2773  FOR_EACH_SEQID_ON_BIOSEQ(seqid_ci, *pBioseq) {
2774  const CSeq_id & seqid = **seqid_ci;
2775 
2776  switch( seqid.Which() ) {
2778  case CSeq_id::e_Other:
2779  case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd:
2780  case CSeq_id::e_Gpipe:
2781  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2782  eLastRegularChoice = seqid.Which();
2783  break;
2784 
2785  case CSeq_id::e_Gi:
2786  if( seqid.GetGi() > ZERO_GI ) {
2787  const CFlatFileConfig& cfg = GetContext()->Config();
2788  if (! (cfg.HideGI() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes())) {
2789  if ( eLastRegularChoice == CSeq_id::e_not_set ) {
2790  // use as protein_id if it's the first usable one
2791  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2792  }
2793  x_AddQual( eFQ_db_xref, new CFlatSeqIdQVal( seqid, true ) );
2794  }
2795  }
2796  break;
2797 
2798  case CSeq_id::e_General:
2799  // show it if it's the *one* usable general seqid. otherwise, ignore
2800  if( *seqid_ci == pTheOneUsableGeneralSeqId ) {
2801  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2802  }
2803  break;
2804 
2805  default:
2806  // ignore other types
2807  break;
2808  }
2809  }
2810  } else if( protId ) {
2811 
2812  TGi gi = ZERO_GI;
2813  string prot_acc;
2814 
2815  // get gi and prot_acc
2816  if ( protId->IsGi() ) {
2817  gi = protId->GetGi();
2818  if( gi > ZERO_GI ) {
2819  try {
2820  prot_acc = GetAccessionForGi( gi, ctx.GetScope() );
2821  } catch ( CException& ) {}
2822  }
2823  } else {
2824 
2825  // swap is faster than assignment
2826  // protId->GetSeqIdString(true).swap( prot_acc );
2827  prot_acc = protId->GetSeqIdString(true);
2828 
2829  // find prot_acc and gi
2830  //const CTextseq_id* pTextSeq_id = protId->GetTextseq_Id();
2831  //if( pTextSeq_id ) {
2832  // stringstream protAccStrm;
2833  // pTextSeq_id->AsFastaString(protAccStrm);
2834  // // swap is faster than assignment
2835  // protAccStrm.str().swap( prot_acc );
2836 
2837  //}
2838  try {
2839  gi = ctx.GetScope().GetGi( CSeq_id_Handle::GetHandle(*protId) );
2840  } catch(CException &) {
2841  // could not get gi
2842  }
2843  }
2844 
2845  if( ! prot_acc.empty() ) {
2846  if ( ! ctx.Config().DropIllegalQuals() || IsValidAccession( prot_acc ) ) {
2847  try {
2848  CRef<CSeq_id> acc_id( new CSeq_id( prot_acc ) );
2849  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( *acc_id ) );
2850  } catch( CException & ) {
2851  x_AddQual( eFQ_protein_id, new CFlatStringQVal(prot_acc) );
2852  }
2853  }
2854  }
2855 
2856  if( gi > ZERO_GI ) {
2857  CConstRef<CSeq_id> pGiSeqId(
2858  protId->IsGi() ?
2859  protId.GetPointer() :
2860  new CSeq_id(CSeq_id::e_Gi, gi) );
2861  x_AddQual( eFQ_db_xref, new CFlatSeqIdQVal( *pGiSeqId, true ) );
2862  }
2863  }
2864 }
2865 
2866 // ----------------------------------------------------------------------------
2869  const CProt_ref* protRef )
2870 // ----------------------------------------------------------------------------
2871 {
2872  if ( !protRef ) {
2873  return;
2874  }
2875 
2876  const CFlatFileConfig& cfg = ctx.Config();
2877  const CProt_ref::TName& names = protRef->GetName();
2878  if ( !names.empty() ) {
2879  if ( ! cfg.IsModeDump() ) {
2881  new CFlatStringQVal( names.front() ) );
2882  if ( names.size() > 1 ) {
2885  }
2886 
2887  } else {
2890  }
2891  }
2892  }
2893 }
2894 
2895 // ----------------------------------------------------------------------------
2897  const CProt_ref* protRef )
2898 // ----------------------------------------------------------------------------
2899 {
2900  if ( !protRef || !protRef->IsSetDesc() ) {
2901  return;
2902  }
2903 
2904  string desc = protRef->GetDesc();
2905  TrimSpacesAndJunkFromEnds( desc, true );
2906  bool add_period = RemovePeriodFromEnd( desc, true );
2907  CRef<CFlatStringQVal> prot_desc( new CFlatStringQVal( desc ) );
2908  if ( add_period ) {
2909  prot_desc->SetAddPeriod();
2910  }
2911  x_AddQual( eFQ_prot_desc, prot_desc );
2912 }
2913 
2914 // ----------------------------------------------------------------------------
2916  const CProt_ref* protRef )
2917 // ----------------------------------------------------------------------------
2918 {
2919  if ( !protRef || protRef->GetActivity().empty() ) {
2920  return;
2921  }
2922  ITERATE (CProt_ref::TActivity, it, protRef->GetActivity()) {
2924  }
2925 }
2926 
2927 // ----------------------------------------------------------------------------
2930  const CProt_ref* protRef )
2931 // ----------------------------------------------------------------------------
2932 {
2933  if ( !protRef || !protRef->IsSetEc() || protRef->GetEc().empty() ) {
2934  return;
2935  }
2936 
2937  const CFlatFileConfig& cfg = ctx.Config();
2938  ITERATE(CProt_ref::TEc, ec, protRef->GetEc()) {
2939  if ( !cfg.DropIllegalQuals() || s_IsLegalECNumber( *ec ) ) {
2941  }
2942  }
2943 }
2944 
2945 // ----------------------------------------------------------------------------
2947  const CMappedFeat& cds,
2949  bool pseudo)
2950 // ----------------------------------------------------------------------------
2951 {
2952  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2953  if (! idx) return;
2954  CBioseq_Handle hdl = ctx.GetHandle();
2955  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
2956  if (! bsx) return;
2957 
2958  if ( ctx.IsEMBL() || ctx.IsDDBJ() ) {
2959  pseudo = false;
2960  }
2961 
2962  const CCdregion& cdr = cds.GetData().GetCdregion();
2963 
2964  // const CSeq_loc& cdsloc = cds.GetLocation();
2965  const CSeq_loc& orgloc = cds.GetOriginalFeature().GetLocation();
2966  const CSeq_loc& bsploc = ctx.GetLocation();
2967 
2968  // cerr << "CDS " << MSerial_AsnText << cdsloc;
2969  // cerr << "ORG " << MSerial_AsnText << orgloc;
2970  // cerr << "BSP " << MSerial_AsnText << bsploc;
2971 
2972  int inset = 0;
2973  if ( ! ctx.GetLocation().IsWhole()) {
2974  if (bsploc.IsInt()) {
2975  const CSeq_interval& bspint = bsploc.GetInt();
2976  if ( orgloc.IsSetStrand() && orgloc.GetStrand() == eNa_strand_minus ) {
2977  CBioseq_Handle& hdl = ctx.GetHandle();
2978  if (hdl) {
2979  int pos = bspint.GetTo();
2980  // cerr << "PS " << pos << endl;
2981  const CSeq_id* bid = bsploc.GetId();
2982  ENa_strand strand = eNa_strand_minus;
2983  CSeq_id& cid = const_cast<CSeq_id&>(*bid);
2984  CConstRef<CSeq_loc> newloc(new CSeq_loc(cid, pos, pos, strand));
2985  // cerr << "NEW " << MSerial_AsnText << newloc;
2986  inset = sequence::LocationOffset(orgloc, *newloc, eOffset_FromStart, &ctx.GetScope());
2987  // cerr << "IS " << inset << endl;
2988  }
2989  } else {
2990  int pos = bspint.GetFrom();
2991  // cerr << "PS " << pos << endl;
2992  const CSeq_id* bid = bsploc.GetId();
2993  ENa_strand strand = eNa_strand_plus;
2994  CSeq_id& cid = const_cast<CSeq_id&>(*bid);
2995  CConstRef<CSeq_loc> newloc(new CSeq_loc(cid, pos, pos, strand));
2996  // cerr << "NEW " << MSerial_AsnText << newloc;
2997  inset = sequence::LocationOffset(orgloc, *newloc, eOffset_FromStart, &ctx.GetScope());
2998  // cerr << "IS " << inset << endl;
2999  }
3000  }
3001  }
3002  if (inset < 0) {
3003  inset = 0;
3004  }
3005  inset = (inset % 3);
3006 
3007  const CProt_ref* protRef = nullptr;
3008  CMappedFeat protFeat;
3009  CConstRef<CSeq_id> prot_id;
3010 
3011  string tr_ex;
3012  for (auto& gbqual : cds.GetQual()) {
3013  if (!gbqual->IsSetQual() || !gbqual->IsSetVal()) continue;
3014  if (NStr::CompareNocase( gbqual->GetQual(), "transl_except") != 0) continue;
3015  tr_ex = gbqual->GetVal ();
3016  break;
3017  }
3018  TQI it = m_Quals.begin();
3019  while ( it != m_Quals.end() ) {
3020  if ( it->first == eFQ_transl_except ) {
3021  it = m_Quals.Erase(it);
3022  } else {
3023  ++it;
3024  }
3025  }
3026 
3028  x_AddQualCodonStartIdx( cdr, ctx, inset );
3029  x_AddQualTranslationExceptionIdx( cdr, ctx, tr_ex );
3030  x_AddQualProteinConflict( cdr, ctx );
3031  x_AddQualCodedBy( ctx );
3032  if ( ctx.IsProt() && IsMappedFromCDNA() ) {
3033  return;
3034  }
3035 
3036  // protein qualifiers
3037  if (m_Feat.IsSetProduct()) {
3039  ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
3040  x_GetAssociatedProtInfoIdx( ctx, prot, protRef, protFeat, prot_id );
3043  x_AddQualProtNote( protRef, protFeat );
3044  x_AddQualProteinId( ctx, prot, prot_id );
3045  x_AddQualTranslation( prot, ctx, pseudo );
3046  }
3047 
3048  // add qualifiers where associated xref overrides the ref:
3049  const CProt_ref* protXRef = m_Feat.GetProtXref();
3050  if ( ! protXRef ) {
3051  protXRef = protRef;
3052  }
3053  x_AddQualCdsProduct( ctx, protXRef );
3054  x_AddQualProtDesc( protXRef );
3055  x_AddQualProtActivity( protXRef );
3056  x_AddQualProtEcNumber( ctx, protXRef );
3057 }
3058 
3059 // ----------------------------------------------------------------------------
3061  const CMappedFeat& cds,
3063  bool pseudo)
3064 // ----------------------------------------------------------------------------
3065 {
3066  const CCdregion& cdr = cds.GetData().GetCdregion();
3067 
3068  const CProt_ref* protRef = nullptr;
3069  CMappedFeat protFeat;
3070  CConstRef<CSeq_id> prot_id;
3071 
3073  x_AddQualCodonStart( cdr, ctx );
3075  x_AddQualProteinConflict( cdr, ctx );
3076  x_AddQualCodedBy( ctx );
3077  if ( ctx.IsProt() && IsMappedFromCDNA() ) {
3078  return;
3079  }
3080 
3081  // protein qualifiers
3082  if (m_Feat.IsSetProduct()) {
3084  ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
3085  x_GetAssociatedProtInfo( ctx, prot, protRef, protFeat, prot_id );
3088  x_AddQualProtNote( protRef, protFeat );
3089  x_AddQualProteinId( ctx, prot, prot_id );
3090  x_AddQualTranslation( prot, ctx, pseudo );
3091  }
3092 
3093  // add qualifiers where associated xref overrides the ref:
3094  const CProt_ref* protXRef = m_Feat.GetProtXref();
3095  if ( ! protXRef ) {
3096  protXRef = protRef;
3097  }
3098  x_AddQualCdsProduct( ctx, protXRef );
3099  x_AddQualProtDesc( protXRef );
3100  x_AddQualProtActivity( protXRef );
3101  x_AddQualProtEcNumber( ctx, protXRef );
3102 }
3103 
3104 static int s_ScoreSeqIdHandle(const CSeq_id_Handle& idh)
3105 {
3106  CConstRef<CSeq_id> id = idh.GetSeqId();
3107  CRef<CSeq_id> id_non_const
3108  (const_cast<CSeq_id*>(id.GetPointer()));
3109  return CSeq_id::Score(id_non_const);
3110 }
3111 
3112 
3114 {
3115  //
3116  // Objective:
3117  // Find the best choice among a given subset of id types. I.e. if a certain
3118  // id scores well but is not of a type we approve of, we still reject it.
3119  //
3122 
3123  ITERATE( CBioseq_Handle::TId, it, ids ) {
3124  switch( (*it).Which() ) {
3125  case CSeq_id::e_Genbank:
3126  case CSeq_id::e_Embl:
3127  case CSeq_id::e_Ddbj:
3128  case CSeq_id::e_Gi:
3129  case CSeq_id::e_Other:
3130  case CSeq_id::e_General:
3131  case CSeq_id::e_Tpg:
3132  case CSeq_id::e_Tpe:
3133  case CSeq_id::e_Tpd:
3134  case CSeq_id::e_Gpipe:
3135  tracker(*it);
3136  break;
3137  default:
3138  break;
3139  }
3140  }
3141  return tracker.GetBestChoice();
3142 }
3143 
3144 // ---------------------------------------------------------------------------
3146  CBioseq_Handle& prod,
3147  EFeatureQualifier slot)
3148 // ---------------------------------------------------------------------------
3149 {
3150  //
3151  // Objective (according to the C toolkit):
3152  // We need one (and only one) /xxx_id tag. If there are multiple ids
3153  //
3154 
3155  if (!prod) {
3156  return;
3157  }
3158  const CBioseq_Handle::TId& ids = prod.GetId();
3159  if (ids.empty()) {
3160  return;
3161  }
3162 
3163  CSeq_id_Handle best = s_FindBestIdChoice(ids);
3164  if (!best) {
3165  return;
3166  }
3167  x_AddQual(slot, new CFlatSeqIdQVal(*best.GetSeqId()));
3168 
3169  if( m_Feat.GetData().IsCdregion() || ! GetContext()->IsProt() ) {
3170  const CFlatFileConfig& cfg = GetContext()->Config();
3171  ITERATE( CBioseq_Handle::TId, id_iter, ids ) {
3172  if( id_iter->IsGi() ) {
3173  if (! (cfg.HideGI() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes())) {
3175  new CFlatStringQVal("GI:" + NStr::NumericToString(id_iter->GetGi()) ));
3176  }
3177  }
3178  }
3179  }
3180 }
3181 
3182 // ----------------------------------------------------------------------------
3184  CBioseqContext& ctx )
3185 // ----------------------------------------------------------------------------
3186 {
3187  _ASSERT( m_Feat.GetData().IsRegion() );
3188 
3189  //cerr << MSerial_AsnText << m_Feat.GetOriginalFeature();
3190 
3191  const CSeqFeatData& data = m_Feat.GetData();
3192  const string &region = data.GetRegion();
3193  if ( region.empty() ) {
3194  return;
3195  }
3196 
3197  if ( ctx.IsProt() &&
3198  data.GetSubtype() == CSeqFeatData::eSubtype_region )
3199  {
3201  } else {
3202  x_AddQual(eFQ_region, new CFlatStringQVal("Region: " + region));
3203  }
3204 
3205  /// parse CDD data from the user object
3206  list< CConstRef<CUser_object> > objs;
3207  if (m_Feat.IsSetExt()) {
3208  objs.push_back(CConstRef<CUser_object>(&m_Feat.GetExt()));
3209  }
3210  if (m_Feat.IsSetExts()) {
3211  copy(m_Feat.GetExts().begin(), m_Feat.GetExts().end(),
3212  back_inserter(objs));
3213  }
3214 
3215  ITERATE (list< CConstRef<CUser_object> >, it, objs) {
3216  const CUser_object& obj = **it;
3217  bool found = false;
3218  if (obj.IsSetType() &&
3219  obj.GetType().IsStr() &&
3220  obj.GetType().GetStr() == "cddScoreData") {
3221  CConstRef<CUser_field> f = obj.GetFieldRef("definition");
3222  if (f) {
3223  CUser_field_Base::C_Data::TStr definition_str = f->GetData().GetStr();
3224  RemovePeriodFromEnd(definition_str, true);
3225  if( ! s_StrEqualDisregardFinalPeriod(definition_str, region, NStr::eNocase) ) {
3227  new CFlatStringQVal(definition_str));
3228  found = true;
3229  }
3230  break;
3231 
3232  /**
3233  if (ctx.IsProt()) {
3234  if (f->GetData().GetStr() != region || added_raw) {
3235  x_AddQual(eFQ_region,
3236  new CFlatStringQVal(f->GetData().GetStr()));
3237  }
3238  } else {
3239  x_AddQual(eFQ_region,
3240  new CFlatStringQVal(f->GetData().GetStr()));
3241  }
3242 
3243  found = true;
3244  break;
3245  **/
3246 
3247  /**
3248  if (ctx.IsProt() && region == f->GetData().GetStr()) {
3249  /// skip
3250  } else {
3251  x_AddQual(eFQ_region,
3252  new CFlatStringQVal(f->GetData().GetStr()));
3253  found = true;
3254  break;
3255  }
3256  **/
3257  }
3258  }
3259 
3260  if (found) {
3261  break;
3262  }
3263  }
3264 }
3265 
3266 
3267 // ----------------------------------------------------------------------------
3269  CBioseqContext& ctx )
3270 // ----------------------------------------------------------------------------
3271 {
3272  _ASSERT( m_Feat.GetData().IsBond() );
3273 
3274  const CSeqFeatData& data = m_Feat.GetData();
3275  const string& bond = s_GetBondName( data.GetBond() );
3276  if ( NStr::IsBlank( bond ) ) {
3277  return;
3278  }
3279 
3280  if ( ( ctx.IsGenbankFormat() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) && ctx.IsProt() ) {
3281  x_AddQual( eFQ_bond_type, new CFlatStringQVal( bond ) );
3282  } else {
3283  x_AddQual( eFQ_bond, new CFlatBondQVal( bond ) );
3284  }
3285 }
3286 
3287 // ----------------------------------------------------------------------------
3289  CBioseqContext& ctx )
3290 // ----------------------------------------------------------------------------
3291 {
3293 
3294  const CSeqFeatData& data = m_Feat.GetData();
3295 
3296  CSeqFeatData_Base::TPsec_str sec_str_type = data.GetPsec_str();
3297 
3298  string sec_str_as_str = CSeqFeatData_Base::ENUM_METHOD_NAME(EPsec_str)()->FindName(sec_str_type, true);
3299  x_AddQual( eFQ_sec_str_type, new CFlatStringQVal( sec_str_as_str ) );
3300 }
3301 
3302 // ----------------------------------------------------------------------------
3304  CBioseqContext& ctx )
3305 // ----------------------------------------------------------------------------
3306 {
3308 
3309  const CSeqFeatData& data = m_Feat.GetData();
3310 
3311  CSeqFeatData_Base::TNon_std_residue n_s_res = data.GetNon_std_residue();
3312 
3313  x_AddQual( eFQ_non_std_residue, new CFlatStringQVal( n_s_res ) );
3314 }
3315 
3316 // ----------------------------------------------------------------------------
3318  CBioseqContext& ctx )
3319 // ----------------------------------------------------------------------------
3320 {
3321  _ASSERT( m_Feat.GetData().IsHet() );
3322 
3323  const CSeqFeatData& data = m_Feat.GetData();
3324 
3325  CSeqFeatData_Base::THet het = data.GetHet();
3326 
3327  x_AddQual( eFQ_heterogen, new CFlatStringQVal( het.Get() ) );
3328 }
3329 
3330 // ----------------------------------------------------------------------------
3332  CBioseqContext& ctx )
3333 // ----------------------------------------------------------------------------
3334 {
3336 
3337  const CSeqFeatData& data = m_Feat.GetData();
3338  const CSeqFeatData_Base::TVariation& variation = data.GetVariation();
3339 
3340  // Make the /db_xref qual
3341  if( variation.CanGetId() ) {
3342  const CVariation_ref_Base::TId& dbt = variation.GetId();
3343  // the id tag is quite specific (e.g. db must be "dbSNP", etc.) or it won't print
3344  if ( dbt.IsSetDb() && !dbt.GetDb().empty() &&
3345  dbt.IsSetTag() && dbt.GetTag().IsStr() ) {
3346  const string &oid_str = dbt.GetTag().GetStr();
3347  if( dbt.GetDb() == "dbSNP" && NStr::StartsWith(oid_str, "rs" ) ) {
3348  x_AddQual(eFQ_db_xref, new CFlatStringQVal( dbt.GetDb() + ":" + oid_str.substr( 2 ) ) );
3349  }
3350  }
3351  }
3352 
3353  // Make the /replace quals:
3354  if( variation.CanGetData() && variation.GetData().IsInstance() &&
3355  variation.GetData().GetInstance().CanGetDelta() ) {
3356  const CVariation_inst_Base::TDelta& delta = variation.GetData().GetInstance().GetDelta();
3357  ITERATE( CVariation_inst_Base::TDelta, delta_iter, delta ) {
3358  if( *delta_iter && (*delta_iter)->CanGetSeq() ) {
3359  const CDelta_item_Base::TSeq& seq = (*delta_iter)->GetSeq();
3360  if( seq.IsLiteral() && seq.GetLiteral().CanGetSeq_data() ) {
3361  const CDelta_item_Base::C_Seq::TLiteral& seq_literal = seq.GetLiteral();
3362  const CSeq_literal_Base::TSeq_data& seq_data = seq_literal.GetSeq_data();
3363 
3364  // convert the data to the standard a,c,g,t
3365  CSeq_data iupacna_seq_data;
3366  CSeqportUtil::Convert( seq_data,
3367  &iupacna_seq_data,
3369  string nucleotides = iupacna_seq_data.GetIupacna().Get();
3370 
3371  // if the specified length and the length of the data conflict,
3372  // use the smaller
3373  const string::size_type max_len_allowed = seq_literal.GetLength();
3374  if( nucleotides.size() > max_len_allowed ) {
3375  nucleotides.resize( max_len_allowed );
3376  }
3377 
3378  NStr::ToLower( nucleotides );
3379 
3380  if (!NStr::IsBlank(nucleotides)) {
3381  x_AddQual(eFQ_replace, new CFlatStringQVal(nucleotides));
3382  }
3383  }
3384  }
3385  }
3386  }
3387 }
3388 
3390 {
3391  static const string kOther = "other";
3392  static const string kDnaBinding = "DNA binding";
3393  static const string kInhibit = "inhibition";
3394 
3395  switch (site) {
3397  return kOther;
3399  return kDnaBinding;
3401  return kInhibit;
3402 
3403  default:
3404  return CSeqFeatData::ENUM_METHOD_NAME(ESite)()->FindName(site, true);
3405  }
3406 }
3407 
3408 // ----------------------------------------------------------------------------
3410  CBioseqContext& ctx )
3411 // ----------------------------------------------------------------------------
3412 {
3413  _ASSERT( m_Feat.GetData().IsSite() );
3414 
3415  const CSeqFeatData& data = m_Feat.GetData();
3416  CSeqFeatData::TSite site = data.GetSite();
3417  const string& site_name = s_GetSiteName( site );
3418 
3419  // ID-4627 : site_type qualifier is needed for GBSeq/INSDSeq XMl too
3420  if ( (ctx.Config().IsFormatGenbank() ||
3421  ctx.Config().IsFormatGBSeq() ||
3422  ctx.Config().IsFormatINSDSeq()) && ctx.IsProt() ) {
3423  x_AddQual(eFQ_site_type, new CFlatSiteQVal( site_name ) );
3424  } else {
3425  if ( !m_Feat.IsSetComment() ||
3426  ( NStr::Find( m_Feat.GetComment(), site_name ) == NPOS ) ) {
3427  x_AddQual( eFQ_site, new CFlatSiteQVal( site_name ) );
3428  }
3429  }
3430 }
3431 
3432 // ----------------------------------------------------------------------------
3434  const CUser_field& field, const CSeq_feat::TExt& ext )
3435 // ----------------------------------------------------------------------------
3436 {
3437  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
3438  const string& oid = field.GetLabel().GetStr();
3439  if ( oid == "ModelEvidence" ) {
3440  FOR_EACH_GBQUAL_ON_SEQFEAT (gbq_itr, m_Feat) {
3441  const CGb_qual& gbq = **gbq_itr;
3442  if (gbq.IsSetQual()) {
3443  if (NStr::Equal (gbq.GetQual(), "experiment")) return;
3444  }
3445  }
3447  } else if ( oid == "Process" || oid == "Component" || oid == "Function" ) {
3448  x_AddGoQuals(field);
3449  }
3450  }
3451 }
3452 
3453 // ----------------------------------------------------------------------------
3455  const CSeq_feat::TExt& ext )
3456 // ----------------------------------------------------------------------------
3457 {
3458  ITERATE (CUser_object::TData, it, ext.GetData()) {
3459  const CUser_field& field = **it;
3460  if ( !field.IsSetData() ) {
3461  continue;
3462  }
3463  if ( field.GetData().IsObject() ) {
3464  const CUser_object& obj = field.GetData().GetObject();
3465  x_AddQualsExt(obj);
3466  } else if ( field.GetData().IsObjects() ) {
3468  x_AddQualsExt(**o);
3469  }
3470  } else if ( field.GetData().IsFields() ) {
3472  // x_AddGoQuals(**o);
3473  x_AddQualsExt(**o, ext);
3474  }
3475  }
3476  }
3477  if ( ext.IsSetType() && ext.GetType().IsStr() ) {
3478  const string& oid = ext.GetType().GetStr();
3479  if ( oid == "ModelEvidence" ) {
3480  FOR_EACH_GBQUAL_ON_SEQFEAT (gbq_itr, m_Feat) {
3481  const CGb_qual& gbq = **gbq_itr;
3482  if (gbq.IsSetQual()) {
3483  if (NStr::Equal (gbq.GetQual(), "experiment")) return;
3484  }
3485  }
3487  } else if ( oid == "GeneOntology" ) {
3488  x_AddGoQuals(ext);
3489  }
3490  }
3491 }
3492 
3493 // ----------------------------------------------------------------------------
3495  CBioseqContext& ctx )
3496 // ----------------------------------------------------------------------------
3497 {
3498  if ( m_Feat.IsSetProduct() &&
3499  ( !m_Feat.GetData().IsCdregion() && ctx.IsProt() && ! IsMappedFromProt() ) ) {
3500  CBioseq_Handle prod =
3501  ctx.GetScope().GetBioseqHandle( m_Feat.GetProductId() );
3502  if ( prod ) {
3503  const CBioseq_Handle::TId& ids = prod.GetId();
3504  if ( ! ids.empty() ) {
3505  ITERATE (CBioseq_Handle::TId, it, ids) {
3506  if ( it->Which() != CSeq_id::e_Gi ) {
3507  continue;
3508  }
3509  CConstRef<CSeq_id> id = it->GetSeqId();
3510  if (!id->IsGeneral()) {
3511  x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*id, id->IsGi()));
3512  }
3513  }
3514  }
3515  }
3516  }
3517  if ( ! m_Feat.IsSetDbxref() ) {
3518  return ;
3519  }
3521 }
3522 
3523 // ----------------------------------------------------------------------------
3525  const CUser_field& field )
3526 // ----------------------------------------------------------------------------
3527 {
3528  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
3529  const string& label = field.GetLabel().GetStr();
3530  EFeatureQualifier slot = eFQ_none;
3531  if ( label == "Process" ) {
3532  slot = eFQ_go_process;
3533  } else if ( label == "Component" ) {
3534  slot = eFQ_go_component;
3535  } else if ( label == "Function" ) {
3536  slot = eFQ_go_function;
3537  }
3538  if ( slot == eFQ_none ) {
3539  return;
3540  }
3541 
3543  if ( (*it)->GetData().IsFields() ) {
3544  CRef<CFlatGoQVal> go_val( new CFlatGoQVal(**it) );
3545 
3546  bool okay_to_add = true;
3547 
3548  // check for dups
3549  CFeatureItem::TQCI iter = x_GetQual(slot);
3550  for ( ; iter != m_Quals.end() && iter->first == slot; ++iter) {
3551  const CFlatGoQVal & qual = dynamic_cast<const CFlatGoQVal &>( *iter->second );
3552  if( qual.Equals(*go_val) )
3553  {
3554  okay_to_add = false;
3555  break;
3556  }
3557  }
3558 
3559  if( okay_to_add ) {
3560  x_AddQual(slot, go_val);
3561  }
3562  }
3563  }
3564  }
3565 }
3566 
3567 // ----------------------------------------------------------------------------
3569  const CUser_object& uo )
3570 // ----------------------------------------------------------------------------
3571 {
3572  ITERATE (CUser_object::TData, uf_it, uo.GetData()) {
3573  const CUser_field& field = **uf_it;
3574  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
3575  const string& label = field.GetLabel().GetStr();
3576  EFeatureQualifier slot = eFQ_none;
3577  if ( label == "Process" ) {
3578  slot = eFQ_go_process;
3579  } else if ( label == "Component" ) {
3580  slot = eFQ_go_component;
3581  } else if ( label == "Function" ) {
3582  slot = eFQ_go_function;
3583  }
3584  if ( slot == eFQ_none ) {
3585  continue;
3586  }
3587 
3589  if ( (*it)->GetData().IsFields() ) {
3590  CRef<CFlatGoQVal> go_val( new CFlatGoQVal(**it) );
3591 
3592  bool okay_to_add = true;
3593 
3594  // check for dups
3595  CFeatureItem::TQCI iter = x_GetQual(slot);
3596  for ( ; iter != m_Quals.end() && iter->first == slot; ++iter) {
3597  const CFlatGoQVal & qual = dynamic_cast<const CFlatGoQVal &>( *iter->second );
3598  if( qual.Equals(*go_val) )
3599  {
3600  okay_to_add = false;
3601  break;
3602  }
3603  }
3604 
3605  if( okay_to_add ) {
3606  x_AddQual(slot, go_val);
3607  }
3608  }
3609  }
3610  }
3611  }
3612 }
3613 
3614 // ----------------------------------------------------------------------------
3616  const CBioseqContext& ctx,
3617  const CGene_ref* gene_ref,
3618  CConstRef<CSeq_feat>& gene_feat,
3619  bool from_overlap )
3620 // ----------------------------------------------------------------------------
3621 {
3622  const CSeqFeatData& data = m_Feat.GetData();
3623  CSeqFeatData::ESubtype subtype = data.GetSubtype();
3624 
3625  if ( m_Feat.GetData().Which() == CSeqFeatData::e_Gene ) {
3626  gene_ref = &( m_Feat.GetData().GetGene() );
3627  }
3628  if ( ! gene_ref && gene_feat ) {
3629  gene_ref = & gene_feat->GetData().GetGene();
3630  }
3631 
3632  if ( ! gene_ref || gene_ref->IsSuppressed() ) {
3633  return;
3634  }
3635 
3636  const bool is_gene = (subtype == CSeqFeatData::eSubtype_gene);
3637 
3638  const bool okay_to_propage = (subtype != CSeqFeatData::eSubtype_mobile_element &&
3639  subtype != CSeqFeatData::eSubtype_centromere &&
3640  subtype != CSeqFeatData::eSubtype_telomere);
3641 
3642  const string* locus = (gene_ref->IsSetLocus() && !NStr::IsBlank(gene_ref->GetLocus())) ?
3643  &gene_ref->GetLocus() : nullptr;
3644  const string* desc = (gene_ref->IsSetDesc() && !NStr::IsBlank(gene_ref->GetDesc())) ?
3645  &gene_ref->GetDesc() : nullptr;
3646  const TGeneSyn* syn = (gene_ref->IsSetSyn() && !gene_ref->GetSyn().empty()) ?
3647  &gene_ref->GetSyn() : nullptr;
3648  const string* locus_tag =
3649  (gene_ref->IsSetLocus_tag() && !NStr::IsBlank(gene_ref->GetLocus_tag())) ?
3650  &gene_ref->GetLocus_tag() : nullptr;
3651 
3652  if ( ctx.IsProt() ) {
3653  // skip if GenPept format and not gene or CDS
3654  if (subtype != CSeqFeatData::eSubtype_gene && subtype != CSeqFeatData::eSubtype_cdregion) {
3655  return;
3656  }
3657  }
3658 
3659  // gene:
3660  if ( !from_overlap || okay_to_propage ) {
3661  if (locus) {
3662  m_Gene = *locus;
3663  }
3664  else if (desc && okay_to_propage) {
3665  m_Gene = *desc;
3666  }
3667  else if (syn) {
3668  CGene_ref::TSyn syns = *syn;
3669  m_Gene = syns.front();
3670  }
3671  if( !m_Gene.empty() ) {
3672  // we suppress the /gene qual when there's no locus but there is a locus tag (imitates C toolkit)
3673  if (locus || ! locus_tag) {
3675  }
3676  }
3677  }
3678 
3679  // locus tag:
3680  if ( gene_ref || okay_to_propage ) {
3681  if (locus) {
3682  if (locus_tag) {
3684  }
3685  }
3686  else if (locus_tag) {
3688  }
3689  }
3690 
3691  // gene desc:
3692  if ( gene_ref || okay_to_propage ) {
3693  if (locus) {
3694  if (is_gene && desc) {
3695  string desc_cleaned = *desc;
3696  RemovePeriodFromEnd( desc_cleaned, true );
3697  x_AddQual(eFQ_gene_desc, new CFlatStringQVal(desc_cleaned));
3698  }
3699  }
3700  else if (locus_tag) {
3701  if (is_gene && desc) {
3703  }
3704  }
3705  }
3706 
3707  // gene syn:
3708  if ( gene_ref || okay_to_propage ) {
3709  if (locus) {
3710  if (syn) {
3712  }
3713  } else if (locus_tag) {
3714  if (syn) {
3716  }
3717  } else if (desc) {
3718  if (syn) {
3720  }
3721  } else if (syn) {
3722  CGene_ref::TSyn syns = *syn;
3723  syns.pop_front();
3724  // ... and the rest as synonyms
3725  if (syn) {
3727  }
3728  }
3729  }
3730 
3731  // gene nomenclature
3732  if( gene_ref->IsSetFormal_name() && subtype == CSeqFeatData::eSubtype_gene ) {
3734  }
3735 
3736  // gene allele:
3737  {{
3738  // these bool vars just break up the if-statement to make it easier to understand
3739  const bool is_type_where_allele_from_gene_forbidden = (subtype == CSeqFeatData::eSubtype_variation);
3740  const bool is_type_where_allele_from_gene_forbidden_except_with_embl_or_ddbj =
3742  subtype == CSeqFeatData::eSubtype_centromere ||
3743  subtype == CSeqFeatData::eSubtype_telomere );
3744  const bool is_embl_or_ddbj = ( GetContext()->IsEMBL() || GetContext()->IsDDBJ() );
3745  if ( ! is_type_where_allele_from_gene_forbidden &&
3746  ( is_embl_or_ddbj || ! is_type_where_allele_from_gene_forbidden_except_with_embl_or_ddbj ) )
3747  {
3748  if (gene_ref->IsSetAllele() && !NStr::IsBlank(gene_ref->GetAllele())) {
3751  }
3752  }
3753  }}
3754 
3755  // gene xref:
3756  if (gene_ref->IsSetDb()) {
3757  x_AddQual(eFQ_gene_xref, new CFlatXrefQVal(gene_ref->GetDb()));
3758  }
3759 
3760  // gene db-xref:
3761  switch (m_Feat.GetData().Which()) {
3762  case CSeqFeatData::e_Rna:
3764  if (gene_feat && gene_feat->IsSetDbxref()) {
3765  CSeq_feat::TDbxref xrefs = gene_feat->GetDbxref();
3766  if (m_Feat.IsSetDbxref()) {
3768  for (CSeq_feat::TDbxref::iterator i = xrefs.begin();
3769  i != xrefs.end(); ++i) {
3770  if ((*i)->Equals(**it)) {
3771  xrefs.erase(i);
3772  break;
3773  }
3774  }
3775  }
3776  }
3777  if (xrefs.size()) {
3778  x_AddQual(eFQ_db_xref, new CFlatXrefQVal(xrefs));
3779  }
3780  }
3781  break;
3782 
3783  default:
3784  break;
3785  }
3786 
3787  // gene map:
3788  if (!from_overlap && gene_ref->IsSetMaploc() && subtype == CSeqFeatData::eSubtype_gene) {
3789  x_AddQual(eFQ_gene_map, new CFlatStringQVal(gene_ref->GetMaploc()));
3790  }
3791 
3792  // gene pseudogene qual:
3793 
3794  // inherit pseudogene, if possible
3795  if( gene_feat && ! x_HasQual(eFQ_pseudogene) ) {
3796  const string & strPseudoGene = gene_feat->GetNamedQual("pseudogene");
3797  x_AddQual(eFQ_pseudogene, new CFlatStringQVal(strPseudoGene) );
3798  }
3799 }
3800 
3801 // ----------------------------------------------------------------------------
3804  bool pseudo)
3805 // ----------------------------------------------------------------------------
3806 {
3807  _ASSERT( m_Feat.GetData().IsProt() );
3808 
3809  const CSeqFeatData& data = m_Feat.GetData();
3810  const CProt_ref& pref = data.GetProt();
3811  CProt_ref::TProcessed processed = pref.GetProcessed();
3812 
3813  //cerr << MSerial_AsnText << m_Feat.GetOriginalFeature();
3814 
3815  if ( ctx.IsNuc() || (ctx.IsProt() && !IsMappedFromProt()) ) {
3816  if ( pref.IsSetName() && !pref.GetName().empty() ) {
3817  const CProt_ref::TName& names = pref.GetName();
3818  x_AddQual(eFQ_product, new CFlatStringQVal(names.front()));
3819  if (names.size() > 1) {
3821  }
3822  }
3823  if ( pref.IsSetDesc() && !pref.GetDesc().empty() ) {
3824  if ( !ctx.IsProt() ) {
3825  string desc = pref.GetDesc();
3826  TrimSpacesAndJunkFromEnds(desc, true);
3827  bool add_period = RemovePeriodFromEnd(desc, true);
3828  CRef<CFlatStringQVal> prot_desc(new CFlatStringQVal(desc));
3829  if (add_period) {
3830  prot_desc->SetAddPeriod();
3831  }
3832  x_AddQual(eFQ_prot_desc, prot_desc);
3833 // had_prot_desc = true;
3834  } else {
3836  }
3837  }
3838  if ( pref.IsSetActivity() && !pref.GetActivity().empty() ) {
3839  ITERATE (CProt_ref::TActivity, it, pref.GetActivity()) {
3840  if (!NStr::IsBlank(*it)) {
3842  }
3843  }
3844  }
3845  if (pref.IsSetEc() && !pref.GetEc().empty()) {
3846  ITERATE(CProt_ref::TEc, ec, pref.GetEc()) {
3847  if ( !ctx.Config().DropIllegalQuals() || s_IsLegalECNumber(*ec)) {
3849  }
3850  }
3851  }
3852  if ( m_Feat.IsSetProduct() ) {
3854  ctx.GetScope().GetBioseqHandle( m_Feat.GetProductId() );
3855  if ( prot ) {
3857  } else {
3858  try {
3859  const CSeq_id& prod_id =
3860  GetId( m_Feat.GetProduct(), &ctx.GetScope());
3861  if ( ctx.IsRefSeq() || !ctx.Config().ForGBRelease() ) {
3862  x_AddQual(eFQ_protein_id, new CFlatSeqIdQVal(prod_id));
3863  }
3864  } catch (CObjmgrUtilException&) {}
3865  }
3866  }
3867  } else { // protein feature on subpeptide bioseq
3869  }
3870  if ( !pseudo && ( ctx.Config().ShowPeptides() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) ) {
3871  if ( processed == CProt_ref::eProcessed_mature ||
3872  processed == CProt_ref::eProcessed_signal_peptide ||
3874  processed == CProt_ref::eProcessed_propeptide ) {
3875  CSeqVector pep(m_Feat.GetLocation(), ctx.GetScope());
3877  string peptide;
3878  pep.GetSeqData(pep.begin(), pep.end(), peptide);
3879  if (!NStr::IsBlank(peptide)) {
3880  x_AddQual(eFQ_peptide, new CFlatStringQVal(peptide));
3881  }
3882  }
3883  }
3884 
3885  ///
3886  /// report molecular weights
3887  ///
3888  if (ctx.IsProt() && ( ctx.IsRefSeq() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) && ! IsMappedFromProt() &&
3889  ! ( m_Feat.IsSetPartial() && m_Feat.GetPartial() ) &&
3892  ! pseudo )
3893  {
3894  double wt = 0;
3895  bool has_mat_peptide = false;
3896  bool has_propeptide = false;
3897  bool has_signal_peptide = false;
3898 
3900 
3901  const bool is_pept_whole_loc = loc->IsWhole() ||
3902  ( loc->GetStart(eExtreme_Biological) == 0 &&
3903  loc->GetStop(eExtreme_Biological) == (ctx.GetHandle().GetBioseqLength() - 1) );
3904 
3905  if (processed == CProt_ref::eProcessed_not_set ||
3906  processed == CProt_ref::eProcessed_preprotein )
3907  {
3908  SAnnotSelector sel = ctx.SetAnnotSelector();
3910  for (CFeat_CI feat_it(ctx.GetHandle(), sel); feat_it; ++feat_it) {
3911  bool copy_loc = false;
3912  switch (feat_it->GetData().GetProt().GetProcessed()) {
3915  {{
3916  has_signal_peptide = true;
3917  if ( (feat_it->GetLocation().GetTotalRange().GetFrom() ==
3919  ! feat_it->GetLocation().Equals( m_Feat.GetLocation() ) ) {
3920  loc = loc->Subtract(feat_it->GetLocation(),
3922  nullptr, nullptr);
3923  }
3924  }}
3925  break;
3926 
3928  has_mat_peptide = true;
3929  break;
3930 
3932  has_propeptide = true;
3933  break;
3934 
3935  default:
3936  break;
3937  }
3938 
3939  if (copy_loc) {
3940  /// we need to adjust our location to the end of the signal
3941  /// peptide
3942  CRef<CSeq_loc> l(new CSeq_loc);
3943  loc = l;
3944  l->Assign(m_Feat.GetLocation());
3945  l->SetInt().SetTo
3946  (feat_it->GetLocation().GetTotalRange().GetTo());
3947  }
3948  }
3949  }
3950 
3951  /**
3952  CMolInfo::TCompleteness comp = CMolInfo::eCompleteness_partial;
3953  {{
3954  CConstRef<CMolInfo> molinfo
3955  (sequence::GetMolInfo(ctx.GetHandle()));
3956  if (molinfo) {
3957  comp = molinfo->GetCompleteness();
3958  }
3959  }}
3960  **/
3961 
3963 
3964  bool proteinIsAtLeastMature;
3965  switch( pref.GetProcessed() ) {
3968  proteinIsAtLeastMature = false;
3969  break;
3970  default:
3971  proteinIsAtLeastMature = true;
3972  break;
3973  }
3974 
3975  if ( (!has_mat_peptide || !has_signal_peptide || !has_propeptide) || (proteinIsAtLeastMature) || (!is_pept_whole_loc) ) {
3976  try {
3977  const TGetProteinWeight flags = 0;
3979  ctx.GetScope(), loc, flags);
3980  }
3981  catch (CException&) {
3982  }
3983  }
3984  }
3985 
3986  /// note: we report the weight rounded to the nearest int
3987  if (wt) {
3989  new CFlatIntQVal((int(wt + 0.5))));
3990  }
3991  }
3992 
3993  // cleanup
3994  if ( processed == CProt_ref::eProcessed_signal_peptide ||
3995  processed == CProt_ref::eProcessed_transit_peptide ) {
3996  if ( !ctx.IsRefSeq() ) {
3997  // Only RefSeq allows product on signal or transit peptide
3999  }
4000  }
4001  if ( processed == CProt_ref::eProcessed_preprotein &&
4002  !ctx.IsRefSeq() && !ctx.IsProt() &&
4003  data.GetSubtype() == CSeqFeatData::eSubtype_preprotein ) {
4004  const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
4005  if (product) {
4006  x_AddQual(eFQ_encodes, new CFlatStringQVal("encodes " + product->GetValue()));
4008  }
4009  }
4010 }
4011 
4012 
4013 static void s_ParseParentQual(const CGb_qual& gbqual, list<string>& vals)
4014 {
4015  vals.clear();
4016 
4017  if (!gbqual.IsSetVal() || NStr::IsBlank(gbqual.GetVal())) {
4018  return;
4019  }
4020 
4021  const string& val = gbqual.GetVal();
4022 
4023  if (val.length() > 1 && NStr::StartsWith(val, '(') &&
4024  NStr::EndsWith(val, ')') && val.find(',') != NPOS) {
4025  NStr::Split(val, "(,)", vals, NStr::fSplit_Tokenize);
4026  } else {
4027  vals.push_back(val);
4028  }
4029 
4030  list<string>::iterator it = vals.begin();
4031  while (it != vals.end()) {
4032  if (NStr::IsBlank(*it)) {
4033  it = vals.erase(it);
4034  } else {
4035  ConvertQuotes(*it);
4036  ExpandTildes(*it, eTilde_space);
4037  ++it;
4038  }
4039  }
4040 }
4041 
4042 
4044  const char* m_Name;
4046 
4047  operator string(void) const { return m_Name; }
4048 };
4049 
4050 
4051 static bool s_IsValidDirection(const string& direction) {
4052  return NStr::EqualNocase(direction, "LEFT") ||
4053  NStr::EqualNocase(direction, "RIGHT") ||
4054  NStr::EqualNocase(direction, "BOTH");
4055 }
4056 
4057 
4058 static bool s_IsValidnConsSplice(const string& cons_splice) {
4059  return NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:YES)") ||
4060  NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:NO)") ||
4061  NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:ABSENT)") ||
4062  NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:YES)") ||
4063  NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:NO)") ||
4064  NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:ABSENT)") ||
4065  NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:YES)") ||
4066  NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:NO)") ||
4067  NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:ABSENT)");
4068 }
4069 
4070 // currently just converts PMIDs into links
4071 static void
4072 s_HTMLizeExperimentQual( string &out_new_val, const string &val)
4073 {
4074  static const string kPmid("PMID:");
4075 
4076  // just to make sure
4077  out_new_val.clear();
4078 
4079  // str_pos should generally be considered as holding the first position
4080  // in val that we have not yet processed and copied to out_new_val.
4081  SIZE_TYPE str_pos = 0;
4082  while( str_pos < val.length() ) {
4083 
4084  // find next "PMID:" to process
4085  const SIZE_TYPE pmid_label_pos = val.find( "PMID:", str_pos );
4086  if( pmid_label_pos == NPOS ) {
4087  // no more PMIDs left.
4088  // copy the rest of the string and let's leave
4089  copy( val.begin() + str_pos, val.end(), back_inserter(out_new_val) );
4090  return;
4091  }
4092 
4093  // copy val up to just after "PMID:"
4094  const SIZE_TYPE first_pmid_pos = pmid_label_pos + kPmid.length();
4095  copy( val.begin() + str_pos, val.begin() + first_pmid_pos, back_inserter(out_new_val) );
4096  str_pos = first_pmid_pos;
4097 
4098  // push pmids (with links) onto the output
4099  // we consider the pmids to be numbers separated by one or more spaces and/or commas.
4100  bool first_num = true;
4101  while( str_pos < val.length() ) {
4102  // skip spaces and commas before pmid
4103  const SIZE_TYPE next_pmid_pos = val.find_first_not_of(" ,", str_pos);
4104  if( next_pmid_pos == NPOS || ! isdigit(val[next_pmid_pos]) ) {
4105  break;
4106  }
4107 
4108  // find end of pmid
4109  SIZE_TYPE end_of_pmid_pos = val.find_first_not_of("0123456789", next_pmid_pos );
4110  if( NPOS == end_of_pmid_pos ) {
4111  end_of_pmid_pos = val.length();
4112  }
4113 
4114  // extract the actual pmid
4115  string pmid = val.substr(next_pmid_pos, end_of_pmid_pos - next_pmid_pos );
4116 
4117  // write pmid with link
4118  if( ! first_num ) {
4119  out_new_val += ',';
4120  }
4121  out_new_val += "<a href=\"";
4122  out_new_val += strLinkBasePubmed;
4123  out_new_val += pmid;
4124  out_new_val += "\">";
4125  out_new_val += pmid;
4126  out_new_val += "</a>";
4127  str_pos = end_of_pmid_pos;
4128 
4129  first_num = false;
4130  }
4131  }
4132 }
4133 
4134 // ----------------------------------------------------------------------------
4136  CBioseqContext& ctx )
4137 // ----------------------------------------------------------------------------
4138 {
4140 
4141  typedef SStaticPair<const char*, EFeatureQualifier> TLegalImport;
4142  static const TLegalImport kLegalImports[] = {
4143  // Must be in case-insensitive alphabetical order!
4144 #define DO_IMPORT(x) { #x, eFQ_##x }
4145  DO_IMPORT(allele),
4146  DO_IMPORT(bound_moiety),
4147  DO_IMPORT(circular_RNA),
4148  DO_IMPORT(clone),
4149  DO_IMPORT(codon),
4150  DO_IMPORT(compare),
4151  DO_IMPORT(cons_splice),
4152  DO_IMPORT(cyt_map),
4153  DO_IMPORT(direction),
4154  DO_IMPORT(EC_number),
4155  DO_IMPORT(estimated_length),
4156  DO_IMPORT(evidence),
4157  DO_IMPORT(experiment),
4158  DO_IMPORT(frequency),
4159  DO_IMPORT(function),
4160  DO_IMPORT(gap_type),
4161  DO_IMPORT(gen_map),
4162  DO_IMPORT(inference),
4163  DO_IMPORT(insertion_seq),
4164  DO_IMPORT(label),
4165  DO_IMPORT(linkage_evidence),
4166  DO_IMPORT(map),
4167  DO_IMPORT(mobile_element),
4168  DO_IMPORT(mobile_element_type),
4169  DO_IMPORT(mod_base),
4170  DO_IMPORT(ncRNA_class),
4171  DO_IMPORT(number),
4172  DO_IMPORT(old_locus_tag),
4173  DO_IMPORT(operon),
4174  DO_IMPORT(organism),
4175  DO_IMPORT(PCR_conditions),
4176  DO_IMPORT(phenotype),
4177  DO_IMPORT(product),
4178  DO_IMPORT(pseudogene),
4179  DO_IMPORT(rad_map),
4180  DO_IMPORT(recombination_class),
4181  DO_IMPORT(regulatory_class),
4182  DO_IMPORT(replace),
4183  DO_IMPORT(ribosomal_slippage),
4184  DO_IMPORT(rpt_family),
4185  DO_IMPORT(rpt_type),
4186  DO_IMPORT(rpt_unit),
4187  DO_IMPORT(rpt_unit_range),
4188  DO_IMPORT(rpt_unit_seq),
4189  DO_IMPORT(satellite),
4190  DO_IMPORT(standard_name),
4191  DO_IMPORT(tag_peptide),
4192  DO_IMPORT(trans_splicing),
4193  DO_IMPORT(transposon),
4194  DO_IMPORT(UniProtKB_evidence),
4195  DO_IMPORT(usedin)
4196 #undef DO_IMPORT
4197  };
4199  DEFINE_STATIC_ARRAY_MAP(TLegalImportMap, kLegalImportMap, kLegalImports);
4200 
4201  bool check_qual_syntax = ctx.Config().CheckQualSyntax();
4202 
4203  const bool old_locus_tag_added_elsewhere = x_HasQual(eFQ_old_locus_tag);
4204 
4205  bool first_pseudogene = true;
4206 
4207  vector<string> replace_quals;
4208  const CSeq_feat_Base::TQual & qual = m_Feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
4209  ITERATE( CSeq_feat::TQual, it, qual ) {
4210  if (!(*it)->IsSetQual() || !(*it)->IsSetVal()) {
4211  continue;
4212  }
4213  const string& val = (*it)->GetVal();
4214 
4215  const char* name = (*it)->GetQual().c_str();
4216  const TLegalImportMap::const_iterator li = kLegalImportMap.find(name);
4218  if ( li != kLegalImportMap.end() ) {
4219  slot = li->second;
4220  } else if (check_qual_syntax) {
4221  continue;
4222  }
4223 
4224  // only certain slot types may have an empty value (e.g. M96433)
4225  switch(slot) {
4226  case eFQ_replace:
4227  case eFQ_pseudogene:
4228  // empty value allowed for these slot types, so we don't check
4229  break;
4230  default:
4231  // empty value forbidden for other slot types
4232  if( val.empty() ) {
4233  continue;
4234  }
4235  break;
4236  }
4237 
4238  switch (slot) {
4239  case eFQ_allele:
4240  // if /allele inherited from gene, suppress allele gbqual on feature
4241  if (x_HasQual(eFQ_gene_allele)) {
4242  continue;
4243  } else {
4244  x_AddQual(slot, new CFlatStringQVal(val,
4246  }
4247  break;
4248  case eFQ_codon:
4249  if ((*it)->IsSetVal() && !NStr::IsBlank(val)) {
4251  }
4252  break;
4253  case eFQ_cons_splice:
4254  if ((*it)->IsSetVal()) {
4255  if (!check_qual_syntax || s_IsValidnConsSplice(val)) {
4256  x_AddQual(slot, new CFlatStringQVal(val));
4257  }
4258  }
4259  break;
4260  case eFQ_direction:
4261  if ((*it)->IsSetVal()) {
4262  if (!check_qual_syntax || s_IsValidDirection(val)) {
4263  x_AddQual(slot, new CFlatNumberQVal(val));
4264  }
4265  }
4266  break;
4267  case eFQ_estimated_length:
4268  case eFQ_mod_base:
4269  case eFQ_number:
4270  if ((*it)->IsSetVal() && !NStr::IsBlank(val)) {
4271  x_AddQual(slot, new CFlatNumberQVal(val));
4272  }
4273  break;
4274  case eFQ_rpt_type:
4275  x_AddRptTypeQual(val, check_qual_syntax);
4276  break;
4277  case eFQ_rpt_unit:
4278  if ((*it)->IsSetVal()) {
4280  }
4281  break;
4282  case eFQ_usedin:
4283  {{
4284  list<string> vals;
4285  s_ParseParentQual(**it, vals);
4286  ITERATE (list<string>, i, vals) {
4288  }
4289  break;
4290  }}
4291  case eFQ_old_locus_tag:
4292  {{
4293  if( ! old_locus_tag_added_elsewhere ) {
4294  list<string> vals;
4295  s_ParseParentQual(**it, vals);
4296  ITERATE (list<string>, i, vals) {
4298  }
4299  }
4300  break;
4301  }}
4302  case eFQ_rpt_family:
4303  if ((*it)->IsSetVal() && !NStr::IsBlank(val)) {
4304  x_AddQual(slot, new CFlatStringQVal(val));
4305  }
4306  break;
4307  case eFQ_label:
4308  x_AddQual(slot, new CFlatLabelQVal(val));
4309  break;
4310  case eFQ_EC_number:
4311  if ((*it)->IsSetVal() &&
4312  ( ! ctx.Config().DropIllegalQuals() || s_IsLegalECNumber(val) ) ) {
4313  x_AddQual(slot, new CFlatStringQVal(val));
4314  }
4315  break;
4316  case eFQ_illegal_qual:
4317  if ( ctx.UsingSeqEntryIndex() && NStr::CompareNocase (name, "transl_except") == 0 ) {
4318  break;
4319  }
4320  x_AddQual(slot, new CFlatIllegalQVal(**it));
4321  break;
4322  case eFQ_product:
4323  if (!x_HasQual(eFQ_product)) {
4324  x_AddQual(slot, new CFlatStringQVal(val));
4325  } else {
4326  const CFlatStringQVal* gene = x_GetStringQual(eFQ_gene);
4327  const string& gene_val =
4328  gene ? gene->GetValue() : kEmptyStr;
4329  const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
4330  const string& product_val =
4331  product ? product->GetValue() : kEmptyStr;
4332  if (val != gene_val && val != product_val) {
4333  if ( ! ctx.Config().CodonRecognizedToNote() ||
4335  NStr::Find(val, "RNA") == NPOS )
4336  {
4338  }
4339  }
4340  }
4341  break;
4342  case eFQ_compare:
4343  {{
4344  list<string> vals;
4345  s_ParseParentQual(**it, vals);
4346  ITERATE (list<string>, i, vals) {
4347  if (!ctx.Config().CheckQualSyntax() ||
4350  }
4351  }
4352  }}
4353  break;
4354  case eFQ_evidence:
4355  {{
4356  if ( val == "EXPERIMENTAL" ) {
4358  } else if ( val == "NOT_EXPERIMENTAL" ) {
4360  }
4361  }}
4362  break;
4363 
4364  case eFQ_rpt_unit_range:
4366  break;
4367 
4368  case eFQ_replace:
4369  {{
4370  string s(val);
4371  if (string::npos == s.find_first_not_of("ACGTUacgtu")) {
4372  NStr::ToLower(s);
4373  NStr::ReplaceInPlace(s, "u", "t");
4374  }
4375  replace_quals.push_back(s);
4376  }}
4377  break;
4378 
4379  case eFQ_operon:
4380  {{
4381  if( ! x_HasQual(eFQ_operon) ) {
4382  x_AddQual(slot, new CFlatStringQVal(val));
4383  }
4384  }}
4385  break;
4386 
4387  case eFQ_experiment:
4388  {{
4389  if( ctx.Config().DoHTML() && ! CommentHasSuspiciousHtml(val) ) {
4390  string new_val;
4391  s_HTMLizeExperimentQual(new_val, val);
4392  x_AddQual(slot, new CFlatStringQVal(new_val));
4393  } else {
4394  x_AddQual(slot, new CFlatStringQVal(val));
4395  }
4396  }}
4397  break;
4398 
4399  case eFQ_clone:
4401  break;
4402 
4403  case eFQ_pseudogene:
4404 
4405  // our pseudogene(s) override(s) any that existed before
4406  if( first_pseudogene ) {
4407  first_pseudogene = false;
4409  }
4410  x_AddQual(slot, new CFlatStringQVal(val));
4411 
4412  break;
4413 
4414  case eFQ_regulatory_class:
4415  x_AddRegulatoryClassQual(val, check_qual_syntax);
4416  break;
4417 
4419  x_AddRecombinationClassQual(val, check_qual_syntax);
4420  break;
4421 
4422  default:
4423  x_AddQual(slot, new CFlatStringQVal(val));
4424  break;
4425  }
4426  }
4427 
4428  if (replace_quals.size()) {
4429  std::sort(replace_quals.begin(), replace_quals.end());
4430  ITERATE (vector<string>, it, replace_quals) {
4432  }
4433  }
4434 
4435  // some "map-related" qual adjustments
4436  if( ctx.Config().HideSpecificGeneMaps() && ! x_HasQual(eFQ_map) ) {
4437  if( x_HasQual(eFQ_cyt_map) ) {
4438  x_AddQual(eFQ_map, x_GetQual(eFQ_cyt_map)->second );
4439  } else if( x_HasQual(eFQ_gen_map) ) {
4440  x_AddQual(eFQ_map, x_GetQual(eFQ_gen_map)->second );
4441  } else if( x_HasQual(eFQ_rad_map) ) {
4442  x_AddQual(eFQ_map, x_GetQual(eFQ_rad_map)->second );
4443  }
4447  }
4448 }
4449 
4450 // ----------------------------------------------------------------------------
4452  const string& rpt_unit )
4453 // ----------------------------------------------------------------------------
4454 {
4455  if (rpt_unit.empty()) {
4456  return;
4457  }
4458 
4459  vector<string> units;
4460 
4461  if (NStr::StartsWith(rpt_unit, '(') && NStr::EndsWith(rpt_unit, ')') &&
4462  NStr::Find(rpt_unit, "(", 1) == NPOS) {
4463  string tmp = rpt_unit.substr(1, rpt_unit.length() - 2);
4464  NStr::Split(tmp, ",", units, 0);
4465  } else {
4466  units.push_back(rpt_unit);
4467  }
4468 
4469  NON_CONST_ITERATE (vector<string>, it, units) {
4470  if (!it->empty()) {
4473  }
4474  }
4475 }
4476 
4477 
4478 // ----------------------------------------------------------------------------
4480  const string& rpt_type,
4481  bool check_qual_syntax )
4482 // ----------------------------------------------------------------------------
4483 {
4484  if (rpt_type.empty()) {
4485  return;
4486  }
4487 
4488  string value( rpt_type );
4490 
4491  vector<string> pieces;
4493 
4494  ITERATE( vector<string>, it, pieces ) {
4495  if ( ! check_qual_syntax || CGb_qual::IsValidRptTypeValue( *it ) ) {
4497  }
4498  }
4499 }
4500 
4501 
4502 static bool s_IsValidRegulatoryClass(const string& type)
4503 {
4504  vector<string> valid_types = CSeqFeatData::GetRegulatoryClassList();
4505 
4506  FOR_EACH_STRING_IN_VECTOR (itr, valid_types) {
4507  string str = *itr;
4508  if (NStr::Equal (str, type)) return true;
4509  }
4510 
4511  return false;
4512 }
4513 
4514 static bool s_IsValidRecombinationClass(const string& type)
4515 {
4516  vector<string> valid_types = CSeqFeatData::GetRecombinationClassList();
4517 
4518  FOR_EACH_STRING_IN_VECTOR (itr, valid_types) {
4519  string str = *itr;
4520  if (NStr::Equal (str, type)) return true;
4521  }
4522 
4523  return false;
4524 }
4525 
4526 // ----------------------------------------------------------------------------
4528  const string& recombination_class,
4529  bool check_qual_syntax
4530 )
4531 // ----------------------------------------------------------------------------
4532 {
4533  if (recombination_class.empty()) {
4534  return;
4535  }
4536 
4537  string recomb_class = recombination_class;
4538 
4539  if (NStr::StartsWith(recomb_class, "other:")) {
4540  NStr::TrimPrefixInPlace(recomb_class, "other:");
4541  NStr::TruncateSpacesInPlace(recomb_class);
4542  }
4543  if ( s_IsValidRecombinationClass( recomb_class ) ) {
4544  x_AddQual( eFQ_recombination_class, new CFlatStringQVal(recomb_class));
4545  } else {
4547  x_AddQual( eFQ_seqfeat_note, new CFlatStringQVal(recomb_class));
4548  }
4549 }
4550 
4551 
4552 // ----------------------------------------------------------------------------
4554  const string& regulatory_class,
4555  bool check_qual_syntax
4556 )
4557 // ----------------------------------------------------------------------------
4558 {
4559  if (regulatory_class.empty()) {
4560  return;
4561  }
4562 
4563  string reg_class = regulatory_class;
4564 
4565  if (NStr::StartsWith(reg_class, "other:")) {
4566  NStr::TrimPrefixInPlace(reg_class, "other:");
4567  NStr::TruncateSpacesInPlace(reg_class);
4568  }
4569  if ( s_IsValidRegulatoryClass( reg_class ) ) {
4571  } else if (NStr::CompareNocase(reg_class, "other") == 0 &&
4572  m_Feat.IsSetComment() && !m_Feat.GetComment().empty()) {
4574  } else {
4576  x_AddQual( eFQ_seqfeat_note, new CFlatStringQVal(reg_class));
4577  }
4578 }
4579 
4580 
4582 {
4583  const CFlatFileConfig& cfg = GetContext()->Config();
4584 
4585  if ( cfg.IsFormatFTable() ) {
4586  ff.SetQuals() = m_FTableQuals;
4587  return;
4588  }
4589 
4590  ff.SetQuals().reserve(m_Quals.Size());
4591  CFlatFeature::TQuals& qvec = ff.SetQuals();
4592 
4593 #define DO_QUAL(x) x_FormatQual(eFQ_##x, #x, qvec)
4594  DO_QUAL(ncRNA_class);
4595  DO_QUAL(regulatory_class);
4596  DO_QUAL(recombination_class);
4597 
4598  DO_QUAL(partial);
4599  DO_QUAL(gene);
4600 
4601  DO_QUAL(locus_tag);
4602  DO_QUAL(old_locus_tag);
4603 
4604  x_FormatQual(eFQ_gene_syn_refseq, "synonym", qvec);
4605  DO_QUAL(gene_syn);
4606 
4607  x_FormatQual(eFQ_gene_allele, "allele", qvec);
4608 
4609  DO_QUAL(operon);
4610 
4611  DO_QUAL(product);
4612 
4613  x_FormatQual(eFQ_prot_EC_number, "EC_number", qvec);
4614  x_FormatQual(eFQ_prot_activity, "function", qvec);
4615 
4616  DO_QUAL(standard_name);
4617  DO_QUAL(coded_by);
4618  DO_QUAL(derived_from);
4619 
4620  x_FormatQual(eFQ_prot_name, "name", qvec);
4621  DO_QUAL(region_name);
4622  DO_QUAL(bond_type);
4623  DO_QUAL(site_type);
4624  DO_QUAL(sec_str_type);
4625  DO_QUAL(heterogen);
4626  DO_QUAL(non_std_residue);
4627 
4628  DO_QUAL(tag_peptide);
4629 
4630  DO_QUAL(evidence);
4631  DO_QUAL(experiment);
4632  DO_QUAL(inference);
4633  DO_QUAL(exception);
4634  DO_QUAL(ribosomal_slippage);
4635  DO_QUAL(trans_splicing);
4636  DO_QUAL(circular_RNA);
4637  DO_QUAL(artificial_location);
4638 
4639  if ( !cfg.GoQualsToNote() ) {
4640  if( cfg.GoQualsEachMerge() ) {
4641  // combine all quals of a given type onto the same qual
4642  x_FormatGOQualCombined(eFQ_go_component, "GO_component", qvec);
4643  x_FormatGOQualCombined(eFQ_go_function, "GO_function", qvec);
4644  x_FormatGOQualCombined(eFQ_go_process, "GO_process", qvec);
4645  } else {
4646  x_FormatQual(eFQ_go_component, "GO_component", qvec);
4647  x_FormatQual(eFQ_go_function, "GO_function", qvec);
4648  x_FormatQual(eFQ_go_process, "GO_process", qvec);
4649  }
4650  }
4651 
4652  DO_QUAL(nomenclature);
4653 
4654  x_FormatNoteQuals(ff);
4655  DO_QUAL(citation);
4656 
4657  DO_QUAL(number);
4658 
4659  DO_QUAL(pseudo);
4660  DO_QUAL(pseudogene);
4661  DO_QUAL(selenocysteine);
4662  DO_QUAL(pyrrolysine);
4663 
4664  DO_QUAL(codon_start);
4665 
4666  DO_QUAL(anticodon);
4667  if ( ! cfg.CodonRecognizedToNote() ) {
4668  DO_QUAL(trna_codons);
4669  }
4670  DO_QUAL(bound_moiety);
4671  DO_QUAL(clone);
4672  DO_QUAL(compare);
4673  // DO_QUAL(cons_splice);
4674  DO_QUAL(direction);
4675  DO_QUAL(function);
4676  DO_QUAL(frequency);
4677  DO_QUAL(EC_number);
4678  x_FormatQual(eFQ_gene_map, "map", qvec);
4679  // In certain modes, cyt_map, gen_map, and rad_map are
4680  // moved to eFQ_gene_map by x_ImportQuals:
4681  DO_QUAL(cyt_map);
4682  DO_QUAL(gen_map);
4683  DO_QUAL(rad_map);
4684  DO_QUAL(estimated_length);
4685  DO_QUAL(gap_type);
4686  DO_QUAL(linkage_evidence);
4687  DO_QUAL(allele);
4688  DO_QUAL(map);
4689  DO_QUAL(mod_base);
4690  DO_QUAL(PCR_conditions);
4691  DO_QUAL(phenotype);
4692  DO_QUAL(rpt_family);
4693  DO_QUAL(rpt_type);
4694  DO_QUAL(rpt_unit);
4695  DO_QUAL(rpt_unit_range);
4696  DO_QUAL(rpt_unit_seq);
4697  DO_QUAL(satellite);
4698  DO_QUAL(mobile_element);
4699  DO_QUAL(mobile_element_type);
4700  DO_QUAL(usedin);
4701 
4702  // extra imports, actually...
4703  x_FormatQual(eFQ_illegal_qual, "illegal", qvec);
4704 
4705  DO_QUAL(replace);
4706 
4707  DO_QUAL(transl_except);
4708  DO_QUAL(transl_table);
4709  DO_QUAL(codon);
4710  DO_QUAL(organism);
4711  DO_QUAL(label);
4712  x_FormatQual(eFQ_cds_product, "product", qvec);
4713  DO_QUAL(UniProtKB_evidence);
4714  DO_QUAL(protein_id);
4715  DO_QUAL(transcript_id);
4716  DO_QUAL(db_xref);
4717  x_FormatQual(eFQ_gene_xref, "db_xref", qvec);
4718  DO_QUAL(mol_wt);
4719  DO_QUAL(calculated_mol_wt);
4720  DO_QUAL(translation);
4721  DO_QUAL(transcription);
4722  DO_QUAL(peptide);
4723 
4724 #undef DO_QUAL
4725 }
4726 
4727 /*
4728 // check if str2 is a sub string of str1
4729 static bool s_IsRedundant(const string& str1, const string& str2)
4730 {
4731  size_t pos = NPOS;
4732  bool whole = false;
4733  for (pos = NStr::Find(str1, str2); pos != NPOS && !whole; pos += str2.length()) {
4734  whole = IsWholeWord(str1, pos);
4735  }
4736  return (pos != NPOS && whole);
4737 }
4738 
4739 
4740 // Remove redundant elements that occur twice or as part of other elements.
4741 static void s_PruneNoteQuals(CFlatFeature::TQuals& qvec)
4742 {
4743  if (qvec.empty()) {
4744  return;
4745  }
4746  CFlatFeature::TQuals::iterator it1 = qvec.begin();
4747  while (it1 != qvec.end()) {
4748  CFlatFeature::TQuals::iterator it2 = it1 + 1;
4749  const string& val1 = (*it1)->GetValue();
4750  while (it2 != qvec.end()) {
4751  const string& val2 = (*it2)->GetValue();
4752  if (s_IsRedundant(val1, val2)) {
4753  it2 = qvec.erase(it2);
4754  } else if (s_IsRedundant(val2, val1)) {
4755  break;
4756  } else {
4757  ++it2;
4758  }
4759  }
4760  if (it2 != qvec.end()) {
4761  it1 = qvec.erase(it1);
4762  } else {
4763  ++it1;
4764  }
4765  }
4766 }
4767 */
4768 
4770 {
4771  const CFlatFileConfig& cfg = GetContext()->Config();
4772  CFlatFeature::TQuals qvec;
4773 
4774 #define DO_NOTE(x) x_FormatNoteQual(eFQ_##x, GetStringOfFeatQual(eFQ_##x), qvec)
4775 #define DO_NOTE_PREPEND_NEWLINE(x) x_FormatNoteQual(eFQ_##x, GetStringOfFeatQual(eFQ_##x), qvec, IFlatQVal::fPrependNewline )
4776  DO_NOTE(transcript_id_note);
4777  DO_NOTE(gene_desc);
4778 
4779  if ( cfg.CodonRecognizedToNote() ) {
4780  DO_NOTE(trna_codons);
4781  }
4782  DO_NOTE(encodes);
4783  DO_NOTE(prot_desc);
4784  DO_NOTE(prot_note);
4785  DO_NOTE(prot_comment);
4786  DO_NOTE(prot_method);
4787  DO_NOTE(maploc);
4788  DO_NOTE(prot_conflict);
4789  DO_NOTE(prot_missing);
4790  DO_NOTE(seqfeat_note);
4791  DO_NOTE(region);
4792 // DO_NOTE(selenocysteine_note);
4793  DO_NOTE(prot_names);
4794  DO_NOTE(bond);
4795  DO_NOTE(site);
4796 // DO_NOTE(rrna_its);
4797  DO_NOTE(xtra_prod_quals);
4798 // DO_NOTE(inference_bad);
4799  DO_NOTE(modelev);
4800 // DO_NOTE(cdd_definition);
4801 // DO_NOTE(tag_peptide);
4802  DO_NOTE_PREPEND_NEWLINE(exception_note);
4803 
4804  string notestr;
4805  string suffix;
4806 // bool add_period = false;
4807  bool add_period = true/*fl*/;
4808 
4809  s_QualVectorToNote(qvec, true, notestr, suffix, add_period);
4810 
4811  if (GetContext()->Config().GoQualsToNote()) {
4812  qvec.clear();
4813  DO_NOTE(go_component);
4814  DO_NOTE(go_function);
4815  DO_NOTE(go_process);
4816  s_QualVectorToNote(qvec, false, notestr, suffix, add_period);
4817  }
4818  s_NoteFinalize(add_period, notestr, ff, eTilde_tilde);
4819 
4820 #undef DO_NOTE
4821 #undef DO_NOTE_PREPEND_NEWLINE
4822 }
4823 
4825 (EFeatureQualifier slot,
4826  const char* name,
4827  CFlatFeature::TQuals& qvec,
4828  IFlatQVal::TFlags flags) const
4829 {
4830  TQCI it = m_Quals.LowerBound(slot);
4831  TQCI end = m_Quals.end();
4832  while (it != end && it->first == slot) {
4833  it->second->Format(qvec, name, *GetContext(), flags);
4834  ++it;
4835  }
4836 }
4837 
4838 
4840 (EFeatureQualifier slot,
4841  const CTempString & name,
4842  CFlatFeature::TQuals& qvec,
4843  IFlatQVal::TFlags flags) const
4844 {
4846 
4847  TQCI it = m_Quals.LowerBound(slot);
4848  TQCI end = m_Quals.end();
4849  while (it != end && it->first == slot) {
4850  it->second->Format(qvec, name, *GetContext(), flags);
4851  ++it;
4852  }
4853 }
4854 
4855 // This produces one qual out of all the GO quals of the given slot, with their
4856 // values concatenated.
4858 (EFeatureQualifier slot,
4859  const CTempString & name,
4860  CFlatFeature::TQuals& qvec,
4861  TQualFlags flags) const
4862 {
4863  // copy all the given quals with that name since we need to sort them
4864  vector<CConstRef<CFlatGoQVal> > goQuals;
4865 
4866  TQCI it = m_Quals.LowerBound(slot);
4867  TQCI end = m_Quals.end();
4868  while (it != end && it->first == slot) {
4869  goQuals.push_back( CConstRef<CFlatGoQVal>( dynamic_cast<const CFlatGoQVal*>( it->second.GetNonNullPointer() ) ) );
4870  ++it;
4871  }
4872 
4873  if( goQuals.empty() ) {
4874  return;
4875  }
4876 
4877  stable_sort( goQuals.begin(), goQuals.end(), CGoQualLessThan() );
4878 
4879  CFlatFeature::TQuals temp_qvec;
4880 
4881  string combined;
4882 
4883 
4884  string::size_type this_part_beginning_text_string_pos = 0;
4885 
4886  // now concatenate their values into the variable "combined"
4887  const string* pLastQualTextString = nullptr;
4888  ITERATE( vector<CConstRef<CFlatGoQVal> >, iter, goQuals ) {
4889 
4890  // Use thisQualTextString to tell when we have consecutive quals with the
4891  // same text string.
4892  const string *pThisQualTextString = &(*iter)->GetTextString();
4893  if (! pThisQualTextString) {
4894  continue;
4895  }
4896 
4897  (*iter)->Format(temp_qvec, name, *GetContext(), flags);
4898 
4899  if(! pLastQualTextString || ! NStr::EqualNocase(*pLastQualTextString, *pThisQualTextString)) {
4900  // normal case: each CFlatGoQVal has its own part
4901  if( ! combined.empty() ) {
4902  combined += "; ";
4903  this_part_beginning_text_string_pos = combined.length() - 1;
4904  }
4905  combined += temp_qvec.back()->GetValue();
4906  } else {
4907  // consecutive CFlatGoQVal with the same text string: merge
4908  // (chop off the part up to and including the text string )
4909  const string & new_value = temp_qvec.back()->GetValue();
4910 
4911  // let text_string_pos point to the part *after* the text string
4912  SIZE_TYPE post_text_string_pos = NStr::FindNoCase( new_value, *pLastQualTextString );
4913  _ASSERT( post_text_string_pos != NPOS );
4914  post_text_string_pos += pLastQualTextString->length();
4915 
4916  // append the new part after the text string, but only
4917  // if it's not a duplicate
4918  string str_to_append = new_value.substr( post_text_string_pos,
4919  (pLastQualTextString->length() - post_text_string_pos) );
4920  if( NStr::Find(combined, str_to_append, this_part_beginning_text_string_pos) == NPOS ) {
4921  combined.append( str_to_append );
4922  }
4923  }
4924 
4925  pLastQualTextString = pThisQualTextString;
4926  }
4927  pLastQualTextString = nullptr; // just to make sure we don't accidentally use it
4928 
4929  // add the final merged CFormatQual
4930  if( ! combined.empty() ) {
4931  const string prefix = " ";
4932  const string suffix = ";";
4933  TFlatQual res(new CFormatQual(name, combined, prefix, suffix, CFormatQual::eQuoted ));
4934  qvec.push_back(res);
4935  }
4936 }
4937 
4939 {
4940  const IFlatQVal* qual = nullptr;
4941  if ( x_HasQual(slot) ) {
4942  qual = m_Quals.Find(slot)->second;
4943  }
4944  return dynamic_cast<const CFlatStringQVal*>(qual);
4945 }
4946 
4947 
4949 {
4950  IFlatQVal* qual = nullptr;
4951  if (x_HasQual(slot)) {
4952  qual = const_cast<IFlatQVal*>(&*m_Quals.Find(slot)->second);
4953  }
4954  return dynamic_cast<CFlatStringListQVal*>(qual);
4955 }
4956 
4958 {
4959  IFlatQVal* qual = nullptr;
4960  if (x_HasQual(slot)) {
4961  qual = const_cast<IFlatQVal*>(&*m_Quals.Find(slot)->second);
4962  }
4963  return dynamic_cast<CFlatProductNamesQVal*>(qual);
4964 }
4965 
4966 // maps each valid mobile_element_type prefix to whether it
4967 // must have more info after the prefix
4970  { "LINE", false },
4971  { "MITE", false },
4972  { "SINE", false },
4973  { "insertion sequence", false },
4974  { "integron", false },
4975  { "non-LTR retrotransposon", false },
4976  { "other", true },
4977  { "retrotransposon", false },
4978  { "transposon", false }
4979 };
4980 
4983 
4984 // returns whether or not it's valid
4985 bool s_ValidateMobileElementType( const string & mobile_element_type_value )
4986 {
4987  if( mobile_element_type_value.empty() ) {
4988  return false;
4989  }
4990 
4991  // if there's a colon, we ignore the part after the colon for testing purposes
4992  string::size_type colon_pos = mobile_element_type_value.find( ':' );
4993 
4994  const string value_before_colon = ( string::npos == colon_pos
4995  ? mobile_element_type_value
4996  : mobile_element_type_value.substr( 0, colon_pos ) );
4997 
4999  sm_MobileElemTypeKeys.find( value_before_colon.c_str() );
5000  if( prefix_info == sm_MobileElemTypeKeys.end() ) {
5001  return false; // prefix not found
5002  }
5003 
5004  // check if info required after prefix (colon plus info, actually)
5005  if( prefix_info->second ) {
5006  if( string::npos == colon_pos ) {
5007  return false; // no additional info supplied, even though required
5008  }
5009  }
5010 
5011  // all tests passed
5012  return true;
5013 }
5014 
5016 {
5017 public:
5018  explicit CInStringPred( const string &comparisonString )
5019  : m_ComparisonString( comparisonString )
5020  {}
5021 
5022  bool operator()( const string &arg ) {
5023  return NStr::Find( m_ComparisonString, arg ) != NPOS;
5024  }
5025 private:
5026  const string &m_ComparisonString;
5027 };
5028 
5030  const CGene_ref* gene_ref )
5031 {
5032  const TGeneSyn* gene_syn =
5033  (gene_ref && gene_ref->IsSetSyn() && !gene_ref->GetSyn().empty() )
5034  ?
5035  &gene_ref->GetSyn()
5036  :
5037  nullptr;
5038  const CBioseqContext& ctx = *GetContext();
5039 
5040  if (ctx.Config().DropIllegalQuals()) {
5042  }
5043 
5045  const CFlatStringQVal* gene = x_GetStringQual(eFQ_gene);
5046  const CFlatStringQVal* prot_desc = x_GetStringQual(eFQ_prot_desc);
5047  const CFlatStringQVal* standard_name = x_GetStringQual(eFQ_standard_name);
5048  const CFlatStringQVal* seqfeat_note = x_GetStringQual(eFQ_seqfeat_note);
5049 
5050  if (gene) {
5051  const string& gene_name = gene->GetValue();
5052 
5053  // /gene same as feature.comment will suppress /note
5054  if (m_Feat.IsSetComment()) {
5055  if (NStr::Equal(gene_name, m_Feat.GetComment())) {
5057  seqfeat_note = nullptr;
5058  }
5059  }
5060 
5061  // remove protein description that equals the gene name, case sensitive
5062  if (prot_desc) {
5063  if (s_StrEqualDisregardFinalPeriod(gene_name, prot_desc->GetValue(), NStr::eCase)) {
5065  prot_desc = nullptr;
5066  }
5067  }
5068 
5069  // remove prot name if equals gene
5070  if (prot_names) {
5071 
5072  CProt_ref::TName::iterator remove_start = prot_names->SetValue().begin();
5073  ++remove_start; // The "++" is because the first one shouldn't be erased since it's used for the product
5074  CProt_ref::TName::iterator new_end =
5075  remove( remove_start, prot_names->SetValue().end(), gene_name );
5076  prot_names->SetValue().erase( new_end, prot_names->SetValue().end() );
5077 
5078  if (prot_names->GetValue().empty()) {
5080  prot_names = nullptr;
5081  }
5082  }
5083  }
5084 
5085  if (prot_desc) {
5086  const string& pdesc = prot_desc->GetValue();
5087 
5088  // remove prot name if in prot_desc
5089  if (prot_names) {
5090  CProt_ref::TName::iterator remove_start = prot_names->SetValue().begin();
5091  ++remove_start; // The "++" is because the first one shouldn't be erased since it's used for the product
5092  CProt_ref::TName::iterator new_end =
5093  remove_if( remove_start, prot_names->SetValue().end(),
5094  CInStringPred(pdesc) );
5095  prot_names->SetValue().erase( new_end, prot_names->SetValue().end() );
5096 
5097  if (prot_names->GetValue().empty()) {
5099  prot_names = nullptr;
5100  }
5101  }
5102  // remove protein description that equals the cds product, case sensitive
5103  const CFlatStringQVal* cds_prod = x_GetStringQual(eFQ_cds_product);
5104  if (cds_prod) {
5105  if (NStr::Equal(pdesc, cds_prod->GetValue())) {
5107  prot_desc = nullptr;
5108  }
5109  }
5110 
5111  // remove protein description that equals the standard name
5112  if (prot_desc && standard_name) {
5113  // We use s_StrEqualDisregardFinalPeriod rather than plain NStr::EqualNoCase
5114  // because of, e.g., CU638784
5115  if (s_StrEqualDisregardFinalPeriod(pdesc, standard_name->GetValue(), NStr::eNocase )) {
5117  prot_desc = nullptr;
5118  }
5119  }
5120 
5121  // remove protein description that equals a gene synonym
5122  // NC_001823 leave in prot_desc if no cds_product
5123  if (prot_desc && gene_syn && cds_prod) {
5124  ITERATE (TGeneSyn, it, *gene_syn) {
5125  if (!NStr::IsBlank(*it) && pdesc == *it) {
5127  prot_desc = nullptr;
5128  break;
5129  }
5130  }
5131  }
5132  }
5133 
5134  // check if need to remove seqfeat_note
5135  // (This generally occurs when it's equal to (or, sometimes, contained in) another qual
5136  if (m_Feat.IsSetComment()) {
5137  const string &feat_comment = m_Feat.GetComment();
5138  const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
5139  const CFlatStringQVal* cds_product = x_GetStringQual(eFQ_cds_product);
5140 
5141  if (product) {
5142  if (NStr::EqualNocase(product->GetValue(), feat_comment)) {
5144  seqfeat_note = nullptr;
5145  }
5146  }
5147  if (cds_product && seqfeat_note) {
5148  if ( s_StrEqualDisregardFinalPeriod(cds_product->GetValue(), seqfeat_note->GetValue(), NStr::eCase ) ) {
5150  seqfeat_note = nullptr;
5151  }
5152  }
5153  // suppress selenocysteine note if already in comment
5154 // if (NStr::Find(feat_comment, "selenocysteine") != NPOS) {
5155 // x_RemoveQuals(eFQ_selenocysteine_note);
5156 // }
5157 
5158  // /EC_number same as feat.comment will suppress /note
5159  if (seqfeat_note) {
5160  for (TQCI it = x_GetQual(eFQ_EC_number); it != m_Quals.end() && it->first == eFQ_EC_number; ++it) {
5161  const CFlatStringQVal* ec = dynamic_cast<const CFlatStringQVal*>(it->second.GetPointerOrNull());
5162  if (ec) {
5163  if (NStr::EqualNocase(seqfeat_note->GetValue(), ec->GetValue())) {
5165  seqfeat_note = nullptr;
5166  break;
5167  }
5168  }
5169  }
5170  }
5171 
5172  // this sort of note provides no additional info (we already know this is a tRNA by other places)
5173  if( feat_comment == "tRNA-" ) {
5175  seqfeat_note = nullptr;
5176  }
5177  }
5178 
5180  if (note && standard_name) {
5181  if (NStr::Equal(note->GetValue(), standard_name->GetValue())) {
5183  note = nullptr;
5184  }
5185  }
5186  if (! ctx.IsProt() && note && gene_syn) {
5187  ITERATE (TGeneSyn, it, *gene_syn) {
5188  if (NStr::EqualNocase(note->GetValue(), *it)) {
5190  note = nullptr;
5191  break;
5192  }
5193  }
5194  }
5195  if (note && prot_desc) { // e.g. L07143, U28372
5196  if( NStr::Find(prot_desc->GetValue(), note->GetValue()) != NPOS ) {
5198  note = nullptr;
5199  }
5200  }
5201 
5202  // if there is a prot_desc, then we don't add a period to seqfeat_note
5203  // (Obviously, this part must come after the part that cleans up
5204  // the prot_descs, otherwise we may think we have a prot_desc, when the
5205  // prot_desc is actually to be removed )
5206  if (note && x_GetStringQual(eFQ_prot_desc)) {
5207  const_cast<CFlatStringQVal*>(note)->SetAddPeriod( false );
5208  }
5209 
5210  // hide invalid mobile_element_quals
5211  if( ctx.Config().IsModeRelease() || ctx.Config().IsModeEntrez() ) {
5212 
5213  const CFlatStringQVal *mobile_element_type = x_GetStringQual( eFQ_mobile_element_type );
5214  if (mobile_element_type && ! s_ValidateMobileElementType(mobile_element_type->GetValue())) {
5216  }
5217 
5218  }
5219 
5220  // remove invalid pseudogenes:
5221  {
5222  TQI pseudogene_iter = m_Quals.Find(eFQ_pseudogene);
5223  while( pseudogene_iter != m_Quals.end() &&
5224  pseudogene_iter->first == eFQ_pseudogene )
5225  {
5226  const CFlatStringQVal & qual = dynamic_cast<const CFlatStringQVal &>( *pseudogene_iter->second );
5227  if( s_IsValidPseudoGene(GetContext()->Config().GetMode(), qual.GetValue() ) ) {
5228  // keep valid pseudogene
5229  ++pseudogene_iter;
5230  } else {
5231  // erase invalid pseudogene
5232  TQI pseudogene_iter_to_erase = pseudogene_iter;
5233  ++pseudogene_iter;
5234 
5235  m_Quals.Erase(pseudogene_iter_to_erase);
5236  }
5237  }
5238  }
5239 
5240  // /pseudogene qual suppresses /pseudo qual if /pseudogene fits certain patterns
5241  if( // ( GetContext()->Config().IsModeRelease() || GetContext()->Config().IsModeEntrez() ) &&
5243  {
5245  // in this part, always use release-mode validation logic, regardless of actual mode
5246  if( qval && s_IsValidPseudoGene( CFlatFileConfig::eMode_Release, qval->GetValue() ) ) {
5248  }
5249  }
5250 }
5251 
5252 
5254 static const TQualPair sc_GbToFeatQualMap[] = {
5350 // { eFQ_selenocysteine, CSeqFeatData::eQual_note },
5351 // { eFQ_selenocysteine_note, CSeqFeatData::eQual_note },
5369 };
5372 
5374 {
5375  TQualMap::const_iterator it = sc_QualMap.find(qual);
5376  if ( it != sc_QualMap.end() ) {
5377  return it->second;
5378  }
5379  return CSeqFeatData::eQual_bad;
5380 }
5381 
5382 
5384 {
5385  const CSeqFeatData& data = m_Feat.GetData();
5386 
5387  TQI it = m_Quals.begin();
5388  while ( it != m_Quals.end() ) {
5389  CSeqFeatData::EQualifier qual = s_GbToSeqFeatQual(it->first);
5390  if ( !data.IsLegalQualifier(qual) ) {
5391  it = m_Quals.Erase(it);
5392  } else {
5393  ++it;
5394  }
5395  }
5396 }
5397 
5399 {
5400  const CSeqFeatData& data = m_Feat.GetData();
5401  return data.IsLegalQualifier(qual);
5402 }
5403 
5404 // ----------------------------------------------------------------------------
5406  CBioseqContext& ctx )
5407 // ----------------------------------------------------------------------------
5408 {
5409  bool pseudo = m_Feat.IsSetPseudo() && m_Feat.GetPseudo();
5410 
5411  const CSeqFeatData& data = m_Feat.GetData();
5412 
5413  switch ( m_Feat.GetData().Which() ) {
5414  case CSeqFeatData::e_Gene:
5415  pseudo |= x_AddFTableGeneQuals(data.GetGene());
5416  break;
5417  case CSeqFeatData::e_Rna:
5419  break;
5422  break;
5423  case CSeqFeatData::e_Prot:
5425  break;
5427  x_AddFTableRegionQuals(data.GetRegion());
5428  break;
5429  case CSeqFeatData::e_Bond:
5430  x_AddFTableBondQuals(data.GetBond());
5431  break;
5432  case CSeqFeatData::e_Site:
5433  x_AddFTableSiteQuals(data.GetSite());
5434  break;
5436  x_AddFTablePsecStrQuals(data.GetPsec_str());
5437  break;
5439  x_AddFTableNonStdQuals(data.GetNon_std_residue());
5440  break;
5441  case CSeqFeatData::e_Het:
5442  x_AddFTablePsecStrQuals(data.GetHet());
5443  break;
5445  x_AddFTableBiosrcQuals(data.GetBiosrc());
5446  break;
5447  default:
5448  break;
5449  }
5450  if ( pseudo ) {
5451  x_AddFTableQual("pseudo");
5452  }
5453  const CGene_ref* grp = m_Feat.GetGeneXref();
5454  if (grp) {
5455  string gene_label;
5456  if (grp->IsSuppressed()) {
5457  gene_label = "-";
5458  } else {
5459  grp->GetLabel(&gene_label);
5460  }
5461  x_AddFTableQual("gene", gene_label);
5462  }
5463  if ( m_Feat.IsSetComment() && !m_Feat.GetComment().empty() ) {
5464  x_AddFTableQual("note", m_Feat.GetComment());
5465  }
5466  if ( m_Feat.IsSetExp_ev() ) {
5467  string ev;
5468  switch ( m_Feat.GetExp_ev() ) {
5470  ev = "experimental";
5471  break;
5473  ev = "not_experimental";
5474  break;
5475  }
5476  x_AddFTableQual("evidence", ev);
5477  }
5478  if ( m_Feat.IsSetExcept_text() && !m_Feat.GetExcept_text().empty() ) {
5479  string exception_text = m_Feat.GetExcept_text();
5480  if ( exception_text == "ribosomal slippage" ) {
5481  x_AddFTableQual("ribosomal_slippage");
5482  }
5483  else if ( exception_text == "trans-splicing" ) {
5484  x_AddFTableQual("trans_splicing");
5485  }
5486  else if ( exception_text == "circular RNA" ) {
5487  x_AddFTableQual("circular_RNA");
5488  }
5489  x_AddFTableQual("exception", m_Feat.GetExcept_text());
5490  } else if ( m_Feat.IsSetExcept() && m_Feat.GetExcept() ) {
5491  x_AddFTableQual("exception");
5492  }
5493  const CSeq_feat_Base::TQual & qual = m_Feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
5494  const bool hide_ids = GetContext()->Config().HideProteinID();
5495  ITERATE( CSeq_feat::TQual, it, qual ) {
5496  const CGb_qual& qual = **it;
5497  const string& key = qual.IsSetQual() ? qual.GetQual() : kEmptyStr;
5498  const string& val = qual.IsSetVal() ? qual.GetVal() : kEmptyStr;
5499  if ( !key.empty() && !val.empty() ) {
5500  if (hide_ids &&
5501  (key == "protein_id" ||
5502  key == "orig_protein_id" ||
5503  key == "transcript_id" ||
5504  key == "orig_transcript_id"))
5505  {
5506  continue;
5507  }
5509  }
5510  }
5511  if ( m_Feat.IsSetExt() ) {
5513  }
5514  if ( data.IsGene() ) {
5515  x_AddFTableDbxref(data.GetGene().GetDb());
5516  } else if ( data.IsProt() ) {
5517  x_AddFTableDbxref(data.GetProt().GetDb());
5518  }
5520 }
5521 
5522 // ----------------------------------------------------------------------------
5524  const CSeq_feat::TExt& ext )
5525 // ----------------------------------------------------------------------------
5526 {
5527  ITERATE (CUser_object::TData, it, ext.GetData()) {
5528  const CUser_field& field = **it;
5529  if ( !field.IsSetData() ) {
5530  continue;
5531  }
5532  if ( field.GetData().IsObject() ) {
5533  const CUser_object& obj = field.GetData().GetObject();
5534  x_AddQualsExt(obj);
5535  return;
5536  } else if ( field.GetData().IsObjects() ) {
5538  x_AddQualsExt(**o);
5539  }
5540  return;
5541  }
5542  }
5543  if ( ext.IsSetType() && ext.GetType().IsStr() ) {
5544  const string& oid = ext.GetType().GetStr();
5545  if ( oid == "GeneOntology" ) {
5546  ITERATE (CUser_object::TData, uf_it, ext.GetData()) {
5547  const CUser_field& field = **uf_it;
5548  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
5549  const string& label = field.GetLabel().GetStr();
5550  string name;
5551  if ( label == "Process" ) {
5552  name = "GO_process";
5553  } else if ( label == "Component" ) {
5554  name = "GO_component";
5555  } else if ( label == "Function" ) {
5556  name = "GO_function";
5557  }
5558  if ( name.empty() ) {
5559  continue;
5560  }
5561 
5563  if ( (*it)->GetData().IsFields() ) {
5564  CFlatGoQVal(**it).Format(m_FTableQuals, name, *GetContext(), 0);;
5565  }
5566  }
5567  }
5568  }
5569  }
5570  }
5571 }
5572 
5573 // ----------------------------------------------------------------------------
5575  const CSeq_feat::TDbxref& dbxref )
5576 // ----------------------------------------------------------------------------
5577 {
5578  ITERATE (CSeq_feat::TDbxref, it, dbxref) {
5579  const CDbtag& dbt = **it;
5580  if ( dbt.IsSetDb() && !dbt.GetDb().empty() &&
5581  dbt.IsSetTag() ) {
5582  const CObject_id& oid = dbt.GetTag();
5583  switch ( oid.Which() ) {
5584  case CObject_id::e_Str:
5585  if ( !oid.GetStr().empty() ) {
5586  x_AddFTableQual("db_xref", dbt.GetDb() + ":" + oid.GetStr());
5587  }
5588  break;
5589  case CObject_id::e_Id:
5590  x_AddFTableQual("db_xref", dbt.GetDb() + ":" + NStr::IntToString(oid.GetId()));
5591  break;
5592  default:
5593  break;
5594  }
5595  }
5596  }
5597 }
5598 
5599 // ----------------------------------------------------------------------------
5601  const CGene_ref& gene )
5602 // ----------------------------------------------------------------------------
5603 {
5604  if ( gene.IsSetLocus() && !gene.GetLocus().empty() ) {
5606  }
5607  if ( gene.IsSetAllele() && !gene.GetAllele().empty() ) {
5608  x_AddFTableQual("allele", gene.GetAllele());
5609  }
5610  ITERATE (CGene_ref::TSyn, it, gene.GetSyn()) {
5612  }
5613  if ( gene.IsSetDesc() && !gene.GetDesc().empty() ) {
5614  x_AddFTableQual("gene_desc", gene.GetDesc());
5615  }
5616  if ( gene.IsSetMaploc() && !gene.GetMaploc().empty() ) {
5617  x_AddFTableQual("map", gene.GetMaploc());
5618  }
5619  if ( gene.IsSetLocus_tag() && !gene.GetLocus_tag().empty() ) {
5621  }
5622 
5623  return (gene.IsSetPseudo() && gene.GetPseudo());
5624 }
5625 
5626 
5628  const CTrna_ext& trna_ext,
5630 {
5631 
5632 
5633  if (!trna_ext.IsSetAnticodon()) {
5634  return;
5635  }
5636 
5637  const auto& loc = trna_ext.GetAnticodon();
5638  string pos = CFlatSeqLoc(loc, ctx).GetString();
5639 
5640  string aa;
5641  switch(trna_ext.GetAa().Which()) {
5643  aa = GetAAName(trna_ext.GetAa().GetIupacaa(), true);
5644  break;
5646  aa = GetAAName(trna_ext.GetAa().GetNcbieaa(), true);
5647  break;
5649  aa = GetAAName(trna_ext.GetAa().GetNcbi8aa(), false);
5650  break;
5652  aa = GetAAName(trna_ext.GetAa().GetNcbistdaa(), false);
5653  break;
5654  default:
5655  break;
5656  }
5657 
5658  string seq("---");
5659  try {
5660  CSeqVector seq_vec(loc, ctx.GetScope(), CBioseq_Handle::eCoding_Iupac);
5661  seq_vec.GetSeqData(0, 3, seq);
5662  NStr::ToLower(seq);
5663  }
5664  catch(...)
5665  {}
5666 
5667 
5668  x_AddFTableQual("anticodon", "(pos:" + pos + ",aa:" + aa + ",seq:" + seq + ")");
5669 
5670 }
5671 
5672 // ----------------------------------------------------------------------------
5674  const CMappedFeat& feat,
5675  CBioseqContext& ctx )
5676 // ----------------------------------------------------------------------------
5677 {
5678  string label;
5679 
5680  if ( !feat.GetData().IsRna() ) {
5681  return;
5682  }
5683  const CFlatFileConfig& cfg = GetContext()->Config();
5684  const CSeqFeatData::TRna& rna = feat.GetData().GetRna();
5685  if (rna.IsSetExt()) {
5686  const CRNA_ref::TExt& ext = rna.GetExt();
5687  if (ext.IsName()) {
5688  if (!ext.GetName().empty()) {
5689  x_AddFTableQual("product", ext.GetName());
5690  }
5691  } else if (ext.IsTRNA()) {
5693  feature::fFGL_Content, &ctx.GetScope());
5694  x_AddFTableQual("product", label);
5695  // check for anticodon
5697  }
5698  else if ( ext.IsGen() ) {
5699  const CRNA_gen& gen = ext.GetGen();
5700  if ( gen.IsSetClass() ) {
5701  if ( gen.IsLegalClass()) {
5702  x_AddFTableQual("ncRNA_class", gen.GetClass());
5703  }
5704  else {
5705  x_AddFTableQual("ncRNA_class", "other");
5706  x_AddFTableQual("note", gen.GetClass());
5707  }
5708  }
5709 
5710  if ( gen.IsSetProduct() ) {
5711  x_AddFTableQual("product", gen.GetProduct());
5712  }
5713  }
5714  }
5715 
5716  if ( feat.IsSetProduct() && !cfg.HideProteinID()) {
5717  CBioseq_Handle prod =
5718  ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
5719  if ( prod ) {
5720  string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(),
5721  !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp() || ctx.Config().IsPolicyGenomes()));
5722  if (!NStr::IsBlank(id_str)) {
5723  x_AddFTableQual("transcript_id", id_str);
5724  }
5725  }
5726  }
5727 }
5728 
5729 
5730 // originally SeqIdWriteForTable in the C Toolkit
5731 // specific Seq-ids are included in the value, in a specific order
5732 string CFeatureItem::x_SeqIdWriteForTable(const CBioseq& seq, bool suppress_local, bool giOK)
5733 
5734 {
5735  if (!seq.IsSetId()) {
5736  return kEmptyStr;
5737  }
5738  const CSeq_id* accn = nullptr;
5739  const CSeq_id* local = nullptr;
5740  const CSeq_id* general = nullptr;
5741  const CSeq_id* gi = nullptr;
5742 
5743  ITERATE(CBioseq::TId, it, seq.GetId()) {
5744  switch ((*it)->Which()) {
5745  case CSeq_id::e_Local:
5746  local = it->GetPointer();
5747  break;
5748  case CSeq_id::e_Genbank:
5749  case CSeq_id::e_Embl:
5750  case CSeq_id::e_Pir:
5751  case CSeq_id::e_Swissprot:
5752  case CSeq_id::e_Ddbj:
5753  case CSeq_id::e_Prf:
5754  case CSeq_id::e_Tpg:
5755  case CSeq_id::e_Tpe:
5756  case CSeq_id::e_Tpd:
5757  case CSeq_id::e_Other:
5758  case CSeq_id::e_Gpipe:
5759  accn = it->GetPointer();
5760  break;
5761  case CSeq_id::e_General:
5762  if (!(*it)->GetGeneral().IsSkippable()) {
5763  general = it->GetPointer();
5764  }
5765  break;
5766  case CSeq_id::e_Gi:
5767  gi = it->GetPointer();
5768  break;
5769  default:
5770  break;
5771  }
5772  }
5773 
5774  string label;
5775 
5776  if (accn) {
5777  label = accn->AsFastaString();
5778  }
5779 
5780  if (general) {
5781  if (!label.empty()) {
5782  label += "|";
5783  }
5784  label += general->AsFastaString();
5785  }
5786 
5787  if (local && (! suppress_local) && label.empty()) {
5788  label = local->AsFastaString();
5789  }
5790 
5791  if (gi && giOK && label.empty()) {
5792  label = gi->AsFastaString();
5793  }
5794 
5795  return label;
5796 }
5797 
5798 
5799 // ----------------------------------------------------------------------------
5801  const CMappedFeat& feat,
5802  CBioseqContext& ctx )
5803 // ----------------------------------------------------------------------------
5804 {
5805  CBioseq_Handle prod;
5806  const CFlatFileConfig& cfg = GetContext()->Config();
5807  if ( feat.IsSetProduct() ) {
5808  prod = ctx.GetScope().GetBioseqHandle(feat.GetProductId());
5809  }
5810 
5811  const CProt_ref* prot_xref = feat.GetProtXref();
5812  if (prot_xref) {
5813  x_AddFTableProtQuals(*prot_xref);
5814  }
5815  else
5816  if ( prod ) {
5817  CMappedFeat prot_ref = s_GetBestProtFeature(prod);
5818  if ( prot_ref ) {
5819  /// FIXME: we take the first; we want the longest
5820  x_AddFTableProtQuals(prot_ref);
5821  }
5822  }
5823  const CCdregion& cdr = feat.GetData().GetCdregion();
5824  if ( cdr.IsSetFrame() && cdr.GetFrame() > CCdregion::eFrame_one ) {
5825  x_AddFTableQual("codon_start", NStr::IntToString(cdr.GetFrame()));
5826  }
5828  string pos = CFlatSeqLoc((*it)->GetLoc(), ctx).GetString();
5829  string aa = "OTHER";
5830  switch ((*it)->GetAa().Which()) {
5832  aa = GetAAName((*it)->GetAa().GetNcbieaa(), true);
5833  break;
5835  aa = GetAAName((*it)->GetAa().GetNcbi8aa(), false);
5836  break;
5838  aa = GetAAName((*it)->GetAa().GetNcbistdaa(), false);
5839  break;
5840  default:
5841  break;
5842  }
5843  x_AddFTableQual("transl_except", "(pos:" + pos + ",aa:" + aa + ")");
5844  }
5845 
5846  if (cdr.IsSetCode()) {
5847  int gcode = cdr.GetCode().GetId();
5848  if (gcode > 1 && gcode != 255) {
5849  x_AddFTableQual("transl_table", NStr::NumericToString(gcode));
5850  }
5851  }
5852 
5853  if (prod && !cfg.HideProteinID()) {
5854  string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(),
5855  !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp() || ctx.Config().IsPolicyGenomes()));
5856  if (!NStr::IsBlank(id_str)) {
5857  x_AddFTableQual("protein_id", id_str);
5858  }
5859  }
5860 }
5861 
5862 // ----------------------------------------------------------------------------
5864  const CMappedFeat& prot )
5865 // ----------------------------------------------------------------------------
5866 {
5867  if ( !prot.GetData().IsProt() ) {
5868  return;
5869  }
5870  x_AddFTableProtQuals(prot.GetData().GetProt());
5871 
5872  if ( prot.IsSetComment() && !prot.GetComment().empty() ) {
5873  x_AddFTableQual("prot_note", prot.GetComment());
5874  }
5875 }
5876 
5877 // ----------------------------------------------------------------------------
5879  const CProt_ref& prot_ref)
5880 // ----------------------------------------------------------------------------
5881 {
5882  ITERATE (CProt_ref::TName, it, prot_ref.GetName()) {
5883  if ( !it->empty() ) {
5884  x_AddFTableQual("product", *it);
5885  }
5886  }
5887  if ( prot_ref.IsSetDesc() && !prot_ref.GetDesc().empty() ) {
5888  x_AddFTableQual("prot_desc", prot_ref.GetDesc());
5889  }
5890  ITERATE (CProt_ref::TActivity, it, prot_ref.GetActivity()) {
5891  if ( !it->empty() ) {
5892  x_AddFTableQual("function", *it);
5893  }
5894  }
5895  ITERATE (CProt_ref::TEc, it, prot_ref.GetEc()) {
5896  if ( !it->empty() ) {
5897  x_AddFTableQual("EC_number", *it);
5898  }
5899  }
5900 }
5901 
5902 // ----------------------------------------------------------------------------
5904  const CSeqFeatData::TRegion& region )
5905 // ----------------------------------------------------------------------------
5906 {
5907  if ( !region.empty() ) {
5908  x_AddFTableQual("region", region);
5909  }
5910 }
5911 
5912 // ----------------------------------------------------------------------------
5914  const CSeqFeatData::TBond& bond )
5915 // ----------------------------------------------------------------------------
5916 {
5917  x_AddFTableQual("bond_type", s_GetBondName(bond));
5918 }
5919 
5920 // ----------------------------------------------------------------------------
5922  const CSeqFeatData::TSite& site)
5923 // ----------------------------------------------------------------------------
5924 {
5925  x_AddFTableQual("site_type", s_GetSiteName(site));
5926 }
5927 
5928 // ----------------------------------------------------------------------------
5930  const CSeqFeatData::TPsec_str& psec_str )
5931 // ----------------------------------------------------------------------------
5932 {
5933  const string& psec = CSeqFeatData::ENUM_METHOD_NAME(EPsec_str)()->FindName(
5934  psec_str, true );
5935  x_AddFTableQual("sec_str_type", psec);
5936 }
5937 
5938 // ----------------------------------------------------------------------------
5940  const CSeqFeatData::THet& het)
5941 // ----------------------------------------------------------------------------
5942 {
5943  if ( !het.Get().empty() ) {
5944  x_AddFTableQual("heterogen", het.Get());
5945  }
5946 }
5947 
5948 // ----------------------------------------------------------------------------
5950  const CSeqFeatData::TNon_std_residue& res )
5951 // ----------------------------------------------------------------------------
5952 {
5953  if ( !res.empty() ) {
5954  x_AddFTableQual("non_std_residue", res);
5955  }
5956 }
5957 
5958 
5959 static const string s_GetSubtypeString(const COrgMod::TSubtype& subtype)
5960 {
5961  switch ( subtype ) {
5962  case COrgMod::eSubtype_strain: return "strain";
5963  case COrgMod::eSubtype_substrain: return "substrain";
5964  case COrgMod::eSubtype_type: return "type";
5965  case COrgMod::eSubtype_subtype: return "subtype";
5966  case COrgMod::eSubtype_variety: return "variety";
5967  case COrgMod::eSubtype_serotype: return "serotype";
5968  case COrgMod::eSubtype_serogroup: return "serogroup";
5969  case COrgMod::eSubtype_serovar: return "serovar";
5970  case COrgMod::eSubtype_cultivar: return "cultivar";
5971  case COrgMod::eSubtype_pathovar: return "pathovar";
5972  case COrgMod::eSubtype_chemovar: return "chemovar";
5973  case COrgMod::eSubtype_biovar: return "biovar";
5974  case COrgMod::eSubtype_biotype: return "biotype";
5975  case COrgMod::eSubtype_group: return "group";
5976  case COrgMod::eSubtype_subgroup: return "subgroup";
5977  case COrgMod::eSubtype_isolate: return "isolate";
5978  case COrgMod::eSubtype_common: return "common";
5979  case COrgMod::eSubtype_acronym: return "acronym";
5980  case COrgMod::eSubtype_dosage: return "dosage";
5981  case COrgMod::eSubtype_nat_host: return "nat_host";
5982  case COrgMod::eSubtype_sub_species: return "sub_species";
5983  case COrgMod::eSubtype_specimen_voucher: return "specimen_voucher";
5984  case COrgMod::eSubtype_authority: return "authority";
5985  case COrgMod::eSubtype_forma: return "forma";
5986  case COrgMod::eSubtype_forma_specialis: return "dosage";
5987  case COrgMod::eSubtype_ecotype: return "ecotype";
5988  case COrgMod::eSubtype_synonym: return "synonym";
5989  case COrgMod::eSubtype_anamorph: return "anamorph";
5990  case COrgMod::eSubtype_teleomorph: return "teleomorph";
5991  case COrgMod::eSubtype_breed: return "breed";
5992  case COrgMod::eSubtype_gb_acronym: return "gb_acronym";
5993  case COrgMod::eSubtype_gb_anamorph: return "gb_anamorph";
5994  case COrgMod::eSubtype_gb_synonym: return "gb_synonym";
5995  case COrgMod::eSubtype_old_lineage: return "old_lineage";
5996  case COrgMod::eSubtype_old_name: return "old_name";
5997  case COrgMod::eSubtype_culture_collection: return "culture_collection";
5998  case COrgMod::eSubtype_bio_material: return "bio_material";
5999  case COrgMod::eSubtype_metagenome_source: return "metagenome_source";
6000  case COrgMod::eSubtype_type_material: return "type_material";
6001  case COrgMod::eSubtype_other: return "note";
6002  default: return kEmptyStr;
6003  }
6004  return kEmptyStr;
6005 }
6006 
6007 
6008 static const string s_GetSubsourceString(const CSubSource::TSubtype& subtype)
6009 {
6010  switch ( subtype ) {
6011  case CSubSource::eSubtype_chromosome: return "chromosome";
6012  case CSubSource::eSubtype_map: return "map";
6013  case CSubSource::eSubtype_clone: return "clone";
6014  case CSubSource::eSubtype_subclone: return "subclone";
6015  case CSubSource::eSubtype_haplogroup: return "haplogroup";
6016  case CSubSource::eSubtype_haplotype: return "haplotype";
6017  case CSubSource::eSubtype_genotype: return "genotype";
6018  case CSubSource::eSubtype_sex: return "sex";
6019  case CSubSource::eSubtype_cell_line: return "cell_line";
6020  case CSubSource::eSubtype_cell_type: return "cell_type";
6021  case CSubSource::eSubtype_tissue_type: return "tissue_type";
6022  case CSubSource::eSubtype_clone_lib: return "clone_lib";
6023  case CSubSource::eSubtype_dev_stage: return "dev_stage";
6024  case CSubSource::eSubtype_frequency: return "frequency";
6025  case CSubSource::eSubtype_germline: return "germline";
6026  case CSubSource::eSubtype_rearranged: return "rearranged";
6027  case CSubSource::eSubtype_lab_host: return "lab_host";
6028  case CSubSource::eSubtype_pop_variant: return "pop_variant";
6029  case CSubSource::eSubtype_tissue_lib: return "tissue_lib";
6030  case CSubSource::eSubtype_plasmid_name: return "plasmid_name";
6031  case CSubSource::eSubtype_transposon_name: return "transposon_name";
6032  case CSubSource::eSubtype_insertion_seq_name: return "insertion_seq_name";
6033  case CSubSource::eSubtype_plastid_name: return "plastid_name";
6034  case CSubSource::eSubtype_country: return "country";
6035  case CSubSource::eSubtype_segment: return "segment";
6036  case CSubSource::eSubtype_endogenous_virus_name: return "endogenous_virus_name";
6037  case CSubSource::eSubtype_transgenic: return "transgenic";
6038  case CSubSource::eSubtype_environmental_sample: return "environmental_sample";
6039  case CSubSource::eSubtype_isolation_source: return "isolation_source";
6040  case CSubSource::eSubtype_other: return "note";
6041  default: return kEmptyStr;
6042  }
6043  return kEmptyStr;
6044 }
6045 
6046 // ----------------------------------------------------------------------------
6048  const CBioSource& src )
6049 // ----------------------------------------------------------------------------
6050 {
6051  if ( src.IsSetOrg() ) {
6052  const CBioSource::TOrg& org = src.GetOrg();
6053 
6054  if ( org.IsSetTaxname() && !org.GetTaxname().empty() ) {
6055  x_AddFTableQual("organism", org.GetTaxname());
6056  }
6057 
6058  if ( org.IsSetOrgname() ) {
6059  ITERATE (COrgName::TMod, it, org.GetOrgname().GetMod()) {
6060  if ( (*it)->IsSetSubtype() ) {
6061  string str = s_GetSubtypeString((*it)->GetSubtype());
6062  if ( str.empty() ) {
6063  continue;
6064  }
6065  if ( (*it)->IsSetSubname() && !(*it)->GetSubname().empty() ) {
6066  str += (*it)->GetSubname();
6067  }
6069  }
6070  }
6071  }
6072  }
6073 
6074  ITERATE (CBioSource::TSubtype, it, src.GetSubtype()) {
6075  if ( (*it)->IsSetSubtype() ) {
6076  string str = s_GetSubsourceString((*it)->GetSubtype());
6077  if ( str.empty() ) {
6078  continue;
6079  }
6080  if ( (*it)->IsSetName() ) {
6081  str += (*it)->GetName();
6082  }
6084  }
6085  }
6086 }
6087 
6088 
6089 /////////////////////////////////////////////////////////////////////////////
6090 // Source Feature
6091 /////////////////////////////////////////////////////////////////////////////
6092 
6094 (const CMappedFeat& feat,
6097  const CSeq_loc* loc)
6098  : CFeatureItemBase(feat, ctx, ftree, loc ? loc : &feat.GetLocation()),
6099  m_WasDesc(false), m_IsFocus(false), m_IsSynthetic(false)
6100 {
6101  x_GatherInfo(ctx);
6102 }
6103 
6104 
6106 {
6107  return eItem_SourceFeat;
6108 }
6109 
6111 {
6112  const CBioSource& bsrc = GetSource();
6113  if (!bsrc.IsSetOrg()) {
6114  m_Feat = CMappedFeat();
6115  x_SetSkip();
6116  return;
6117  }
6118 
6119  m_IsFocus = bsrc.IsSetIs_focus();
6120  if (bsrc.GetOrigin() == CBioSource::eOrigin_synthetic) {
6121  m_IsSynthetic = true;
6122  }
6123  if (!m_IsSynthetic && bsrc.GetOrg().IsSetOrgname()) {
6124  m_IsSynthetic = bsrc.GetOrg().GetOrgname().IsSetDiv() &&
6125  NStr::EqualNocase(bsrc.GetOrg().GetOrgname().GetDiv(), "SYN");
6126  }
6127  if (!m_IsSynthetic && bsrc.IsSetOrg() && bsrc.GetOrg().IsSetTaxname()) {
6128  if (NStr::EqualNocase(bsrc.GetOrg().GetTaxname(), "synthetic construct")) {
6129  m_IsSynthetic = true;
6130  }
6131  }
6132  x_AddQuals(ctx);
6133 }
6134 
6135 
6137 {
6138  const CSeqFeatData& data = m_Feat.GetData();
6139  _ASSERT(data.IsOrg() || data.IsBiosrc());
6140  // add various generic qualifiers...
6142  new CFlatMolTypeQVal(ctx.GetBiomol(), ctx.GetMol()));
6144  new CFlatSubmitterSeqidQVal(ctx.GetTech()));
6145  if (m_Feat.IsSetComment()) {
6147  }
6148  if (m_Feat.IsSetTitle()) {
6150  }
6151  if (m_Feat.IsSetCit()) {
6153  }
6154  if (m_Feat.IsSetDbxref()) {
6156  }
6157 
6158  // add qualifiers from biosource fields
6159  x_AddQuals(data.GetBiosrc(), ctx);
6160 }
6161 
6162 
6164 {
6165  return GetSourceQualOfOrgMod( static_cast<COrgMod::ESubtype>(om.GetSubtype()) );
6166 }
6167 
6170  const string& strRawName )
6171 {
6172  if ( ! ctx.Config().DoHTML() ) {
6173  return strRawName;
6174  }
6175 
6176  // doesn't COrgMod already have the code for this?
6177  string inst;
6178  string coll;
6179  string id;
6180  {
6181  if( ! COrgMod::ParseStructuredVoucher(strRawName, inst, coll, id) || NStr::IsBlank(inst)) {
6182  return strRawName;
6183  }
6184  if( ! coll.empty() ) {
6185  inst += ':' + coll;
6186  }
6187  }
6188 
6190  if( voucher_info_ref ) {
6192 
6193  string inst_full_name = COrgMod::GetInstitutionFullName( inst );
6194  if (inst_full_name.empty()) {
6195  inst_full_name = voucher_info_ref->m_InstFullName;
6196  }
6197  text << "<acronym title=\""
6198  << NStr::Replace(inst_full_name, "\"", "&quot;")
6199  << "\" class=\"voucher\">"
6200  << inst << "</acronym>"
6201  << ":"
6202  << "<a href=\"" << *voucher_info_ref->m_Links;
6203 
6204  if( voucher_info_ref->m_PrependInstitute) {
6205  text << inst;
6206  }
6207  if( voucher_info_ref->m_PrependCollection) {
6208  text << coll;
6209  }
6210  if (voucher_info_ref->m_Prefix) {
6211  text << *voucher_info_ref->m_Prefix;
6212  }
6213  if (voucher_info_ref->m_Trim) {
6214  const string& trim = *voucher_info_ref->m_Trim;
6215  if (NStr::StartsWith(id, trim)) {
6216  NStr::TrimPrefixInPlace(id, trim);
6218  }
6219  }
6220  if (voucher_info_ref->m_PadTo > 0 && voucher_info_ref->m_PadWith) {
6221  int len_id = (int) id.length();
6222  int len_pad = (int) voucher_info_ref->m_PadWith->length();
6223  while (len_id < voucher_info_ref->m_PadTo) {
6224  text << *voucher_info_ref->m_PadWith;
6225  len_id += len_pad;
6226  }
6227  }
6228  text << id;
6229  if( voucher_info_ref->m_Suffix ) {
6230  text << *voucher_info_ref->m_Suffix;
6231  }
6232  text << "\">" << id << "</a>";
6233  return CNcbiOstrstreamToString(text);
6234  } else {
6235  // fall back on at least getting institution name
6236  const string &inst_full_name = COrgMod::GetInstitutionFullName( inst );
6237  if( ! inst_full_name.empty() ) {
6239 
6240  text << "<acronym title=\"" << NStr::Replace(inst_full_name, "\"", "&quot;") << "\" class=\"voucher\">"
6241  << inst << "</acronym>"
6242  << ":" << id;
6243 
6244  return CNcbiOstrstreamToString(text);
6245  } else {
6246  // if all else fails, return the string we were initially given
6247  return strRawName;
6248  }
6249  }
6250 }
6251 
6252 
6254 {
6255  CTempString taxname;
6256  CTempString common;
6257  if ( org.IsSetTaxname() ) {
6258  taxname = org.GetTaxname();
6259  }
6260  if ( taxname.empty() && ctx.Config().NeedOrganismQual() ) {
6261  taxname = "unknown";
6262  if ( org.IsSetCommon() ) {
6263  common = org.GetCommon();
6264  }
6265  }
6266  if ( !taxname.empty() ) {
6267  x_AddQual(eSQ_organism, new CFlatStringQVal(taxname));
6268  }
6269  if ( !common.empty() ) {
6271  }
6272  if ( org.IsSetOrgname() ) {
6273  set<CTempString> ecotypesSeen; // holds the ones we've seen so don't show them again
6274  ecotypesSeen.insert(kEmptyStr); // empty string is always considered seen so we hide it
6275  ITERATE (COrgName::TMod, it, org.GetOrgname().GetMod()) {
6276 
6277  const COrgMod& mod = **it;
6278  const string & sSubname = (
6279  mod.CanGetSubname() ? mod.GetSubname() : kEmptyStr );
6280 
6281  ESourceQualifier slot = s_OrgModToSlot(**it);
6282  switch( slot ) {
6283  case eSQ_ecotype:
6284  if( ecotypesSeen.find(sSubname) != ecotypesSeen.end() ) {
6285  break; // already seen
6286  }
6287  ecotypesSeen.insert( sSubname );
6288  x_AddQual(slot, new CFlatOrgModQVal(mod));
6289  break;
6290  case eSQ_none:
6291  break;
6292  default:
6293  {
6294  const COrgMod::TSubtype stype = mod.GetSubtype();
6295  if( COrgMod::HoldsInstitutionCode(stype) ) {
6296  CRef<COrgMod> new_mod( new COrgMod(stype,
6297  ( sSubname.empty() ? kEmptyStr : s_GetSpecimenVoucherText(ctx, sSubname) ) ));
6298  x_AddQual(slot, new CFlatOrgModQVal(*new_mod));
6299  } else if (stype == COrgMod::eSubtype_type_material && (! COrgMod::IsINSDCValidTypeMaterial(sSubname))) {
6301  ( sSubname.empty() ? kEmptyStr : "type_material: " + sSubname ) ));
6302  x_AddQual(eSQ_orgmod_note, new CFlatOrgModQVal(*new_mod));
6303  } else {
6304  x_AddQual(slot, new CFlatOrgModQVal(**it));
6305  }
6306  }
6307  break;
6308  }
6309  }
6310  }
6311  if (!WasDesc() && org.IsSetMod()) {
6313  }
6314  if ( org.IsSetDb() ) {
6316  }
6317 }
6318 
6320 {
6321  if( ! src.IsSetPcr_primers() ) {
6322  return;
6323  }
6324 
6325  const CBioSource_Base::TPcr_primers & primers = src.GetPcr_primers();
6326  if( primers.CanGet() ) {
6328  string primer_value;
6329 
6330  bool has_fwd_seq = false;
6331  bool has_rev_seq = false;
6332 
6333  if( (*it)->IsSetForward() ) {
6334  const CPCRReaction_Base::TForward &forward = (*it)->GetForward();
6335  if( forward.CanGet() ) {
6336  ITERATE( CPCRReaction_Base::TForward::Tdata, it2, forward.Get() ) {
6337  const string &fwd_name = ( (*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
6338  if( ! fwd_name.empty() ) {
6339  s_AddPcrPrimersQualsAppend( primer_value, "fwd_name: ", fwd_name);
6340  }
6341  const string &fwd_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
6342  // NStr::ToLower( fwd_seq );
6343  if( ! fwd_seq.empty() ) {
6344  s_AddPcrPrimersQualsAppend( primer_value, "fwd_seq: ", fwd_seq);
6345  has_fwd_seq = true;
6346  }
6347  }
6348  }
6349  }
6350  if( (*it)->IsSetReverse() ) {
6351  const CPCRReaction_Base::TReverse &reverse = (*it)->GetReverse();
6352  if( reverse.CanGet() ) {
6353  ITERATE( CPCRReaction_Base::TReverse::Tdata, it2, reverse.Get() ) {
6354  const string &rev_name = ((*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
6355  if( ! rev_name.empty() ) {
6356  s_AddPcrPrimersQualsAppend( primer_value, "rev_name: ", rev_name);
6357  }
6358  const string &rev_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
6359  // NStr::ToLower( rev_seq ); // do we need this?
6360  if( ! rev_seq.empty() ) {
6361  s_AddPcrPrimersQualsAppend( primer_value, "rev_seq: ", rev_seq);
6362  has_rev_seq = true;
6363  }
6364  }
6365  }
6366  }
6367 
6368  if( ! primer_value.empty() ) {
6369  const bool is_in_note = ( ! has_fwd_seq || ! has_rev_seq );
6370  if( is_in_note ) {
6371  primer_value = "PCR_primers=" + primer_value;
6372  }
6373  const ESourceQualifier srcQual = ( is_in_note ? eSQ_pcr_primer_note : eSQ_PCR_primers );
6374  x_AddQual( srcQual, new CFlatStringQVal( primer_value ) );
6375  }
6376  }
6377  }
6378 }
6379 
6381 {
6382  return GetSourceQualOfSubSource( static_cast<CSubSource::ESubtype>(ss.GetSubtype()) );
6383 }
6384 
6386 {
6387  // add qualifiers from Org_ref field
6388  if ( src.IsSetOrg() ) {
6389  x_AddQuals(src.GetOrg(), ctx);
6390  }
6392 
6393  bool insertion_seq_name = false,
6394  plasmid_name = false,
6395  transposon_name = false;
6396 
6397  ITERATE (CBioSource::TSubtype, it, src.GetSubtype()) {
6398  ESourceQualifier slot = s_SubSourceToSlot(**it);
6399 
6400  switch( slot ) {
6401 
6403  insertion_seq_name = true;
6404  x_AddQual(slot, new CFlatSubSourceQVal(**it));
6405  break;
6406 
6407  case eSQ_plasmid_name:
6408  plasmid_name = true;
6409  x_AddQual(slot, new CFlatSubSourceQVal(**it));
6410  break;
6411 
6412  case eSQ_transposon_name:
6413  transposon_name = true;
6414  x_AddQual(slot, new CFlatSubSourceQVal(**it));
6415  break;
6416 
6417  case eSQ_metagenomic:
6418  x_AddQual( eSQ_metagenomic, new CFlatStringQVal( "metagenomic") );
6419  break;
6420 
6421  default:
6422  if (slot != eSQ_none) {
6423  x_AddQual(slot, new CFlatSubSourceQVal(**it));
6424  }
6425  break;
6426  }
6427  }
6428 
6429  // Gets direct "pcr-primers" tag from file and adds the quals from that
6430  x_AddPcrPrimersQuals(src, ctx);
6431 
6432  // some qualifiers are flags in genome and names in subsource,
6433  // print once with name
6434  CBioSource::TGenome genome = src.GetGenome();
6435  CRef<CFlatOrganelleQVal> organelle(new CFlatOrganelleQVal(genome));
6436  if ( (insertion_seq_name && genome == CBioSource::eGenome_insertion_seq) ||
6437  (plasmid_name && genome == CBioSource::eGenome_plasmid) ||
6438  (transposon_name && genome == CBioSource::eGenome_transposon) ) {
6439  organelle.Reset();
6440  }
6441  if ( organelle ) {
6442  x_AddQual(eSQ_organelle, organelle);
6443  }
6444 
6445  if ( !WasDesc() && m_Feat.IsSetComment() ) {
6447  }
6448 }
6449 
6451 {
6452  ff.SetQuals().reserve(m_Quals.Size());
6453  CFlatFeature::TQuals& qvec = ff.SetQuals();
6454 
6455 #define DO_QUAL(x) x_FormatQual(eSQ_##x, GetStringOfSourceQual(eSQ_##x), qvec)
6456  DO_QUAL(organism);
6457 
6458  DO_QUAL(organelle);
6459 
6460  DO_QUAL(mol_type);
6461 
6462  DO_QUAL(submitter_seqid);
6463 
6464  DO_QUAL(strain);
6465  DO_QUAL(substrain);
6466  DO_QUAL(variety);
6467  DO_QUAL(serotype);
6468  DO_QUAL(serovar);
6469  DO_QUAL(cultivar);
6470  DO_QUAL(isolate);
6471  DO_QUAL(isolation_source);
6472  DO_QUAL(spec_or_nat_host);
6473  DO_QUAL(sub_species);
6474 
6475  DO_QUAL(specimen_voucher);
6476  DO_QUAL(culture_collection);
6477  DO_QUAL(bio_material);
6478 
6479  DO_QUAL(type_material);
6480 
6481  DO_QUAL(db_xref);
6482  DO_QUAL(org_xref);
6483 
6484  DO_QUAL(chromosome);
6485 
6486  DO_QUAL(segment);
6487 
6488  DO_QUAL(map);
6489  DO_QUAL(clone);
6490  DO_QUAL(subclone);
6491  DO_QUAL(haplotype);
6492  DO_QUAL(haplogroup);
6493  DO_QUAL(sex);
6494  DO_QUAL(mating_type);
6495  DO_QUAL(cell_line);
6496  DO_QUAL(cell_type);
6497  DO_QUAL(tissue_type);
6498  DO_QUAL(clone_lib);
6499  DO_QUAL(dev_stage);
6500  DO_QUAL(ecotype);
6501 
6502  if( ! GetContext()->Config().FrequencyToNote() ) {
6503  DO_QUAL(frequency);
6504  }
6505 
6506  DO_QUAL(germline);
6507  DO_QUAL(rearranged);
6508  DO_QUAL(transgenic);
6509  DO_QUAL(environmental_sample);
6510 
6511  DO_QUAL(lab_host);
6512  DO_QUAL(pop_variant);
6513  DO_QUAL(tissue_lib);
6514 
6515  DO_QUAL(plasmid_name);
6516  DO_QUAL(mobile_element);
6517  DO_QUAL(transposon_name);
6518  DO_QUAL(insertion_seq_name);
6519 
6520  if ( GetContext()->Config().GeoLocNameCountry() || CSubSource::NCBI_UseGeoLocNameForCountry() ) {
6521  x_FormatQual(eSQ_country, "geo_loc_name", qvec);
6522  } else {
6523  DO_QUAL(country);
6524  }
6525 
6526  DO_QUAL(focus);
6527 
6528  DO_QUAL(lat_lon);
6529  DO_QUAL(altitude);
6530  DO_QUAL(collection_date);
6531  DO_QUAL(collected_by);
6532  DO_QUAL(identified_by);
6533  DO_QUAL(PCR_primers);
6534  DO_QUAL(metagenome_source);
6535 
6536  if ( !GetContext()->Config().SrcQualsToNote() ) {
6537  // some note qualifiers appear as regular quals in GBench or Dump mode
6538  x_FormatGBNoteQuals(ff);
6539  }
6540 
6541  DO_QUAL(sequenced_mol);
6542  DO_QUAL(label);
6543  DO_QUAL(usedin);
6544  // DO_QUAL(citation);
6545 #undef DO_QUAL
6546 
6547  // Format the rest of the note quals (ones that weren't formatted above)
6548  // as a single note qualifier
6549  x_FormatNoteQuals(ff);
6550 }
6551 
6552 
6554 {
6555  _ASSERT(!GetContext()->Config().SrcQualsToNote());
6556  CFlatFeature::TQuals& qvec = ff.SetQuals();
6557 
6558 #define DO_QUAL(x) x_FormatQual(eSQ_##x, GetStringOfSourceQual(eSQ_##x), qvec)
6559  DO_QUAL(metagenomic);
6560  DO_QUAL(linkage_group);
6561 
6562  DO_QUAL(type);
6563  DO_QUAL(subtype);
6564  DO_QUAL(serogroup);
6565  DO_QUAL(pathovar);
6566  DO_QUAL(chemovar);
6567  DO_QUAL(biovar);
6568  DO_QUAL(biotype);
6569  DO_QUAL(group);
6570  DO_QUAL(subgroup);
6571  DO_QUAL(common);
6572  DO_QUAL(acronym);
6573  DO_QUAL(dosage);
6574 
6575  DO_QUAL(authority);
6576  DO_QUAL(forma);
6577  DO_QUAL(forma_specialis);
6578  DO_QUAL(synonym);
6579  DO_QUAL(anamorph);
6580  DO_QUAL(teleomorph);
6581  DO_QUAL(breed);
6582  if( GetContext()->Config().FrequencyToNote() ) {
6583  DO_QUAL(frequency);
6584  }
6585 
6586 // DO_QUAL(metagenome_source),
6587 // DO_QUAL(collection_date);
6588 // DO_QUAL(collected_by);
6589 // DO_QUAL(identified_by);
6590 // DO_QUAL(pcr_primer);
6591  DO_QUAL(genotype);
6592  DO_QUAL(plastid_name);
6593 
6594  DO_QUAL(endogenous_virus_name);
6595 
6596  DO_QUAL(zero_orgmod);
6597  DO_QUAL(one_orgmod);
6598  DO_QUAL(zero_subsrc);
6599 #undef DO_QUAL
6600 }
6601 
6602 
6603 /*
6604 static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, const string& str)
6605 {
6606  if (qvec.empty()) {
6607  return false;
6608  }
6609 
6610  int has_exact = 0;
6611  int non_exact = 0;
6612 
6613  CFlatFeature::TQuals::iterator it = qvec.begin();
6614  while (it != qvec.end()) {
6615  const string& val = (*it)->GetValue();
6616  if (NStr::Find(val, str) != NPOS) {
6617  if (NStr::Equal(val, str)) {
6618  has_exact++;
6619  } else {
6620  non_exact++;
6621  }
6622  }
6623  ++it;
6624  }
6625 
6626  if (has_exact == 1 && non_exact > 0) return true;
6627  return false;
6628 }
6629 */
6630 
6631 
6632 
6634 {
6635  CFlatFeature::TQuals qvec;
6636  bool add_period = false;
6637 
6638 #define DO_NOTE(x) x_FormatNoteQual(eSQ_##x, #x, qvec)
6639  if (m_WasDesc) {
6640  x_FormatNoteQual(eSQ_seqfeat_note, "note", qvec);
6641  DO_NOTE(orgmod_note);
6642  DO_NOTE(subsource_note);
6643  } else {
6644  DO_NOTE(unstructured);
6645  }
6646 
6647  if ( GetContext()->Config().SrcQualsToNote() ) {
6648  DO_NOTE(metagenomic);
6649  DO_NOTE(linkage_group);
6650  DO_NOTE(type);
6651  DO_NOTE(subtype);
6652  DO_NOTE(serogroup);
6653  DO_NOTE(pathovar);
6654  DO_NOTE(chemovar);
6655  DO_NOTE(biovar);
6656  DO_NOTE(biotype);
6657  DO_NOTE(group);
6658  DO_NOTE(subgroup);
6659  DO_NOTE(common);
6660  DO_NOTE(acronym);
6661  DO_NOTE(dosage);
6662 
6663  DO_NOTE(authority);
6664  DO_NOTE(forma);
6665  DO_NOTE(forma_specialis);
6666  DO_NOTE(synonym);
6667  DO_NOTE(anamorph);
6668  DO_NOTE(teleomorph);
6669  DO_NOTE(breed);
6670  if( GetContext()->Config().FrequencyToNote() ) {
6671  DO_NOTE(frequency);
6672  }
6673 
6674  /*
6675  if (s_IsExactAndNonExactMatchOnNoteQuals(qvec, "metagenomic")) {
6676  x_FormatNoteQual(eSQ_metagenome_source, "metagenomic; derived from metagenome", qvec);
6677  } else {
6678  x_FormatNoteQual(eSQ_metagenome_source, "derived from metagenome", qvec);
6679  }
6680  */
6681 
6682  DO_NOTE(genotype);
6683  x_FormatNoteQual(eSQ_plastid_name, "plastid", qvec);
6684  x_FormatNoteQual(eSQ_endogenous_virus_name, "endogenous_virus", qvec);
6685  }
6686  DO_NOTE(pcr_primer_note);
6687 
6688  if (!m_WasDesc) {
6689  x_FormatNoteQual(eSQ_seqfeat_note, "note", qvec);
6690  DO_NOTE(orgmod_note);
6691  DO_NOTE(subsource_note);
6692  }
6693 
6694  x_FormatNoteQual(eSQ_common_name, "common", qvec);
6695 
6696  if ( GetContext()->Config().SrcQualsToNote() ) {
6697  x_FormatNoteQual(eSQ_zero_orgmod, "?", qvec);
6698  x_FormatNoteQual(eSQ_one_orgmod, "?", qvec);
6699  x_FormatNoteQual(eSQ_zero_subsrc, "?", qvec);
6700  }
6701 #undef DO_NOTE
6702 
6703  string notestr;
6704  string suffix;
6705 
6706  if ( GetSource().IsSetGenome() &&
6707  GetSource().GetGenome() == CBioSource::eGenome_extrachrom ) {
6708  static const string kEOL = "\n";
6709  notestr += "extrachromosomal";
6710  suffix = kEOL;
6711  }
6712 
6713  s_QualVectorToNote(qvec, true, notestr, suffix, add_period);
6714  s_NoteFinalize(add_period, notestr, ff, eTilde_note);
6715 }
6716 
6717 
6719 (const CBioSource& src,
6720  TRange range,
6723  : CFeatureItemBase(CMappedFeat(), ctx, ftree),
6724  m_WasDesc(true), m_IsFocus(false), m_IsSynthetic(false)
6725 {
6726  if (!src.IsSetOrg()) {
6727  m_Feat = CMappedFeat();
6728  x_SetSkip();
6729  return;
6730  }
6731  x_SetObject(src);
6732 
6733  /// We build a fake BioSource feature - even for a source descriptor
6734  CRef<CSeq_feat> feat(new CSeq_feat);
6735  feat->SetData().SetBiosrc(const_cast<CBioSource&>(src));
6736  if ( range.IsWhole() ) {
6737  feat->SetLocation().SetWhole(*ctx.GetPrimaryId());
6738  } else {
6739  CSeq_interval& ival = feat->SetLocation().SetInt();
6740  ival.SetFrom(range.GetFrom());
6741  ival.SetTo(range.GetTo());
6742  ival.SetId(*ctx.GetPrimaryId());
6743  }
6744 
6745  CRef<CSeq_annot> an(new CSeq_annot);
6746  an->SetData().SetFtable().push_back(feat);
6747 
6748  CRef<CScope> local_scope(new CScope(*CObjectManager::GetInstance()));
6749  CSeq_annot_Handle sah = local_scope->AddSeq_annot(*an);
6750  m_Feat = *(CFeat_CI(sah));
6751  m_Loc = &m_Feat.GetLocation();
6753 
6754  x_GatherInfo(ctx);
6755 }
6756 
6757 
6759 (ESourceQualifier slot,
6760  const CTempString& name,
6761  CFlatFeature::TQuals& qvec,
6762  IFlatQVal::TFlags flags) const
6763 {
6764  TQCI it = m_Quals.LowerBound(slot);
6765  TQCI end = m_Quals.end();
6766  while (it != end && it->first == slot) {
6767  const IFlatQVal* qual = it->second;
6768  qual->Format(qvec, name, *GetContext(),
6770  ++it;
6771  }
6772 }
6773 
6774 
6776 {
6778 }
6779 
6780 
6782 {
6783  m_Loc.Reset(&loc);
6784 }
6785 
6786 
6787 // ----------------------------------------------------------------------------
6789  const string& key,
6790  string& value ) const
6791 // ----------------------------------------------------------------------------
6792 {
6793  CSeq_feat::TQual gbQuals = m_Feat.GetQual();
6794  for ( CSeq_feat::TQual::iterator it = gbQuals.begin();
6795  it != gbQuals.end(); ++it )
6796  {
6797  //
6798  // Idea:
6799  // If a gbqual specifying the inference exists then bail out and let
6800  // gbqual processing take care of this qualifier. If no such gbqual is
6801  // present then add a default inference qualifier.
6802  //
6803  if (!(*it)->IsSetQual() || !(*it)->IsSetVal()) {
6804  continue;
6805  }
6806  if ( (*it)->GetQual() == key ) {
6807  value = (*it)->GetVal();
6808  return true;
6809  }
6810  }
6811  return false;
6812 }
6813 
6815 {
6816  // try to make this fast, since it could be checked by every feature.
6817 
6818  // try to do cheap checks first
6819 
6820  if( ! m_Feat.IsSetExt() ) {
6821  return false;
6822  }
6823  const CUser_object & ext = m_Feat.GetExt();
6824  if( ! ext.IsSetType() || ! ext.IsSetData() ) {
6825  return false;
6826  }
6827  const CUser_object_Base::TType & ext_type = ext.GetType();
6828  if( ! ext_type.IsStr() || ext_type.GetStr() != "CombinedFeatureUserObjects" ) {
6829  return false;
6830  }
6831  const CUser_object::TData & ext_data = ext.GetData();
6832  ITERATE( CUser_object::TData, field_iter, ext_data ) {
6833  const CUser_field & field = **field_iter;
6834  if( ! field.IsSetLabel() || ! field.IsSetData() ) {
6835  continue;
6836  }
6837  const CUser_field::TLabel & field_label = field.GetLabel();
6838  const CUser_field::TData & field_data = field.GetData();
6839  if( ! field_label.IsStr() || ! field_data.IsObject() ||
6840  field_label.GetStr() != "ModelEvidence" )
6841  {
6842  continue;
6843  }
6844  const CUser_object & evidence_object = field_data.GetObject();
6845  if( ! evidence_object.IsSetData() ||
6846  ! evidence_object.IsSetType() ||
6847  ! evidence_object.GetType().IsStr() ||
6848  evidence_object.GetType().GetStr() != "ModelEvidence" )
6849  {
6850  continue;
6851  }
6852  const CUser_object::TData & evidence_data = evidence_object.GetData();
6853  ITERATE( CUser_object::TData, evidence_iter, evidence_data ) {
6854  const CUser_field & evidence_field = **evidence_iter;
6855  if( ! evidence_field.IsSetLabel() ||
6856  ! evidence_field.GetLabel().IsStr() ||
6857  evidence_field.GetLabel().GetStr() != "Method" ||
6858  ! evidence_field.IsSetData() ||
6859  ! evidence_field.GetData().IsStr() ||
6860  evidence_field.GetData().GetStr() != "tRNAscan-SE" )
6861  {
6862  continue;
6863  }
6864  // we found proof of method tRNAscan-SE, so we return true
6865  return true;
6866  }
6867  }
6868 
6869  // didn't find any proof of method tRNAscan-SE
6870  return false;
6871 }
6872 
6875 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
bool operator()(const char ch)
Tracks the best score (lowest value).
Definition: ncbiutil.hpp:219
CScope & GetScope(void) const
Definition: context.hpp:102
const CFlatFileConfig & Config(void) const
Definition: context.hpp:689
vector< TRef > TReferences
Definition: context.hpp:77
bool IsDDBJ(void) const
Definition: context.hpp:183
bool IsEMBL(void) const
Definition: context.hpp:182
CRef< CFeatureIndex > GetFeatureForProduct(void)
Definition: indexer.cpp:2299
CRef< CFeatureIndex > GetFeatIndex(const CMappedFeat &mf)
Definition: indexer.cpp:3044
bool HasOperon(void)
Definition: indexer.cpp:2988
CRef< CFeatureIndex > GetBestProteinFeature(void)
Definition: indexer.cpp:2353
CBioseq_Handle –.
CCdregion –.
Definition: Cdregion.hpp:66
CConstRef –.
Definition: ncbiobj.hpp:1266
Definition: Dbtag.hpp:53
CConstRef< CSeq_id > m_Id
void x_GatherInfo(CBioseqContext &ctx) override
EItem GetItemType() const override
CFeatHeaderItem(CBioseqContext &ctx)
CFeat_CI –.
Definition: feat_ci.hpp:64
CRef< CFeatureIndex > GetBestGene(void)
Definition: indexer.cpp:3204
const CMappedFeat GetMappedFeat(void) const
Definition: indexer.hpp:897
CRef< feature::CFeatTree > m_Feat_Tree
CFeatureItemBase(const CMappedFeat &feat, CBioseqContext &ctx, CRef< feature::CFeatTree > ftree, const CSeq_loc *loc=0, bool suppressAccession=false)
CConstRef< CFlatFeature > Format(void) const
virtual string GetKey(void) const
const CMappedFeat & GetFeat(void) const
const CSeq_loc & GetLoc(void) const
bool IsSuppressAccession(void) const
virtual void x_FormatQuals(CFlatFeature &ff) const =0
CConstRef< CSeq_loc > m_Loc
CMappedFeat m_Feat
void x_FormatGOQualCombined(EFeatureQualifier slot, const CTempString &name, CFlatFeature::TQuals &qvec, TQualFlags flags=0) const
void x_FormatQuals(CFlatFeature &ff) const override
void x_AddQualTranslationExceptionIdx(const CCdregion &, CBioseqContext &, string &tr_ex)
void x_AddFTableDbxref(const CSeq_feat::TDbxref &dbxref)
void x_AddFTableBondQuals(const CSeqFeatData::TBond &bond)
void x_AddFTableRegionQuals(const CSeqFeatData::TRegion &region)
void x_AddFTableQuals(CBioseqContext &ctx)
void x_AddQual(EFeatureQualifier slot, const IFlatQVal *value)
EItem GetItemType() const override
void x_AddQualTranslation(CBioseq_Handle &, CBioseqContext &, bool)
bool x_GetPseudo(const CGene_ref *=0, const CSeq_feat *=0) const
void x_AddQualPseudo(CBioseqContext &, CSeqFeatData::E_Choice, CSeqFeatData::ESubtype, bool)
void x_AddQualsRegion(CBioseqContext &)
bool IsMappedFromProt(void) const
void x_AddQualPartial(CBioseqContext &)
void x_AddQualDb(const CGene_ref *)
void x_FormatQual(EFeatureQualifier slot, const char *name, CFlatFeature::TQuals &qvec, TQualFlags flags=0) const
void x_AddQualsExt(const CUser_field &field, const CSeq_feat::TExt &ext)
TQuals::iterator TQI
CFeatureItem(const CMappedFeat &feat, CBioseqContext &ctx, CRef< feature::CFeatTree > ftree, const CSeq_loc *loc, EMapped mapped=eMapped_not_mapped, bool suppressAccession=false, CConstRef< CFeatureItem > parentFeatureItem=CConstRef< CFeatureItem >())
void x_AddQualsSite(CBioseqContext &)
void x_AddProductIdQuals(CBioseq_Handle &prod, EFeatureQualifier slot)
const CFlatStringQVal * x_GetStringQual(EFeatureQualifier slot) const
void x_AddFTableQual(const string &name, const string &val=kEmptyStr, CFormatQual::ETrim trim=CFormatQual::eTrim_Normal)
void x_DropIllegalQuals(void) const
CConstRef< CGene_ref > m_GeneRef
void x_AddQualSeqfeatNote(CBioseqContext &)
void x_AddQuals(CBioseqContext &ctx, CConstRef< CFeatureItem > parentFeatureItem)
void x_AddQualsProt(CBioseqContext &, bool)
void x_AddFTablePsecStrQuals(const CSeqFeatData::TPsec_str &psec_str)
string GetKey(void) const override
void x_FormatNoteQuals(CFlatFeature &ff) const
void x_AddQualProtComment(const CBioseq_Handle &)
void x_AddQualsHet(CBioseqContext &ctx)
void x_AddQualCodonStartIdx(const CCdregion &, CBioseqContext &, const int inset)
void x_AddQualsBond(CBioseqContext &)
void x_AddRecombinationClassQual(const string &recombination_class, bool check_qual_syntax)
bool x_AddFTableGeneQuals(const CSeqFeatData::TGene &gene)
void x_AddQualProtActivity(const CProt_ref *)
void x_AddRptUnitQual(const string &rpt_unit)
void x_AddQualsIdx(CBioseqContext &ctx, CConstRef< CFeatureItem > parentFeatureItem)
void x_AddFTableNonStdQuals(const CSeqFeatData::TNon_std_residue &res)
void x_AddQualProteinConflict(const CCdregion &, CBioseqContext &)
void x_AddQualsCdregion(const CMappedFeat &cds, CBioseqContext &ctx, bool pseudo)
void x_AddQualsNonStd(CBioseqContext &ctx)
void x_AddQualProtEcNumber(CBioseqContext &, const CProt_ref *)
void x_AddQualsGene(const CBioseqContext &ctx, const CGene_ref *, CConstRef< CSeq_feat > &, bool from_overlap)
void x_RemoveQuals(EFeatureQualifier slot) const
void x_GatherInfoWithParent(CBioseqContext &ctx, CConstRef< CFeatureItem > parentFeatureItem)
virtual void x_AddQualsRna(const CMappedFeat &feat, CBioseqContext &ctx, bool pseudo)
TQualVec m_FTableQuals
TQuals::const_iterator TQCI
void x_AddQualCdsProduct(CBioseqContext &, const CProt_ref *)
void x_AddQualsPsecStr(CBioseqContext &)
void x_AddQualsCdregionIdx(const CMappedFeat &cds, CBioseqContext &ctx, bool pseudo)
IFlatQVal::TFlags TQualFlags
void x_FormatNoteQual(EFeatureQualifier slot, const CTempString &name, CFlatFeature::TQuals &qvec, TQualFlags flags=0) const
void x_AddQualsGb(CBioseqContext &)
CFlatStringListQVal * x_GetStringListQual(EFeatureQualifier slot) const
void x_CleanQuals(const CGene_ref *)
void x_AddQualTranslationTable(const CCdregion &, CBioseqContext &)
CGene_ref::TSyn TGeneSyn
bool x_HasMethodtRNAscanSE(void) const
CFlatProductNamesQVal * x_GetFlatProductNamesQual(EFeatureQualifier slot) const
void x_AddQualTranslationException(const CCdregion &, CBioseqContext &)
void x_AddFTableAnticodon(const CTrna_ext &trna_ext, CBioseqContext &ctx)
void x_AddQualExpInv(CBioseqContext &)
void x_AddQualProtNote(const CProt_ref *, const CMappedFeat &)
void x_AddQualNote(CConstRef< CSeq_feat >)
void x_AddFTableBiosrcQuals(const CBioSource &src)
void x_AddQualProteinId(CBioseqContext &, const CBioseq_Handle &, CConstRef< CSeq_id >)
void x_AddGoQuals(const CUser_field &field)
void x_AddFTableProtQuals(const CMappedFeat &prot)
CSeqFeatData::ESubtype m_Type
void x_AddQualGeneXref(const CGene_ref *, const CConstRef< CSeq_feat > &)
void x_AddQualOldLocusTag(const CBioseqContext &ctx, CConstRef< CSeq_feat >)
bool x_GetGbValue(const string &, string &) const
void x_AddQualProtMethod(const CBioseq_Handle &)
void x_ImportQuals(CBioseqContext &ctx)
void x_AddFTableSiteQuals(const CSeqFeatData::TSite &site)
bool IsMappedFromCDNA(void) const
static string x_SeqIdWriteForTable(const CBioseq &seq, bool suppress_local, bool giOK)
void x_AddQualsRegulatoryClass(CBioseqContext &ctx, CSeqFeatData::ESubtype subtype)
void x_AddQualProtDesc(const CProt_ref *)
void x_AddQualCodedBy(CBioseqContext &)
void x_AddQualOperon(CBioseqContext &, CSeqFeatData::ESubtype)
void x_AddFTableCdregionQuals(const CMappedFeat &feat, CBioseqContext &ctx)
void x_AddFTableRnaQuals(const CMappedFeat &feat, CBioseqContext &ctx)
void x_GetAssociatedProtInfoIdx(CBioseqContext &, CBioseq_Handle &, const CProt_ref *&, CMappedFeat &protFeat, CConstRef< CSeq_id > &)
void x_AddQualsVariation(CBioseqContext &ctx)
void x_AddFTableExtQuals(const CSeq_feat::TExt &ext)
void x_AddRptTypeQual(const string &rpt_type, bool check_qual_syntax)
void x_AddQualDbXref(CBioseqContext &)
TQCI x_GetQual(EFeatureQualifier slot) const
bool x_IsSeqFeatDataFeatureLegal(CSeqFeatData::EQualifier qual)
void x_AddRegulatoryClassQual(const string &regulatory_class, bool check_qual_syntax)
void x_AddQualExt()
bool IsMapped(void) const
void x_AddQualCitation()
bool x_HasQual(EFeatureQualifier slot) const
void x_AddQualExceptions(CBioseqContext &)
void x_GetAssociatedProtInfo(CBioseqContext &, CBioseq_Handle &, const CProt_ref *&, CMappedFeat &protFeat, CConstRef< CSeq_id > &)
void x_AddQualCodonStart(const CCdregion &, CBioseqContext &)
TQuals & SetQuals(void)
vector< CRef< CFormatQual > > TQuals
bool IsFormatDDBJ(void) const
bool GoQualsEachMerge(void) const
bool IsModeGBench(void) const
bool IsFormatGBSeq(void) const
bool GeneRNACDSFeatures(void) const
bool HideRemoteImpFeatures(void) const
bool HideGI(void) const
bool ShowFarTranslations(void) const
bool IupacaaOnly(void) const
bool HideImpFeatures(void) const
bool IsPolicyFtp(void) const
bool AlwaysTranslateCDS(void) const
bool IsFormatINSDSeq(void) const
bool IsPolicyGenomes(void) const
bool IsFormatFTable(void) const
bool NeverTranslateCDS(void) const
bool TranslateIfNoProduct(void) const
bool DropIllegalQuals(void) const
bool CodonRecognizedToNote(void) const
bool HideProteinID(void) const
bool HideMiscFeatures(void) const
bool NeedRequiredQuals(void) const
bool IsModeDump(void) const
bool GoQualsToNote(void) const
bool ValidateFeatures(void) const
bool HideExonFeatures(void) const
bool ShowTranscript(void) const
bool HideIntronFeatures(void) const
int GetPubmedId(void) const
bool Equals(const CFlatGoQVal &rhs) const
void Format(TFlatQuals &quals, const CTempString &name, CBioseqContext &ctx, TFlags flags) const
const string & GetTextString(void) const
void x_SetExternal(void)
Definition: item_base.hpp:176
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
void x_SetObject(const CSerialObject &obj)
Definition: item_base.hpp:160
void x_SetSkip(void)
Definition: item_base.hpp:167
CProt_ref::TName & SetValue(void)
Definition: qualifiers.hpp:681
const CProt_ref::TName & GetValue(void) const
Definition: qualifiers.hpp:680
const string & GetString(void) const
Definition: flat_seqloc.hpp:88
const string & GetValue(void) const
Definition: qualifiers.hpp:281
@ fFlags_showEvenIfRedund
Definition: qualifiers.hpp:81
@ eTrim_WhitespaceOnly
Definition: qualifiers.hpp:93
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
static bool IsValidRptTypeValue(const string &val)
Definition: Gb_qual.cpp:258
static CSeq_feat_Handle ResolveGeneXref(const CGene_ref *xref_g_ref, const CSeq_entry_Handle &top_level_seq_entry)
This does plain, simple resolution of a CGene_ref to its gene.
static void GetAssociatedGeneInfo(const CSeq_feat_Handle &in_feat, CBioseqContext &ctx, const CConstRef< CSeq_loc > &feat_loc, CConstRef< CGene_ref > &out_suppression_check_gene_ref, const CGene_ref *&out_g_ref, CConstRef< CSeq_feat > &out_s_feat, const CSeq_feat_Handle &in_parent_feat)
Find the gene associated with the given feature.
void GetLabel(string *label) const
Definition: Gene_ref.cpp:57
bool IsSuppressed(void) const
Definition: Gene_ref.cpp:75
int GetId(void) const
CHeterogen –.
Definition: Heterogen.hpp:66
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
bool operator()(const string &arg)
const string & m_ComparisonString
CInStringPred(const string &comparisonString)
static TVoucherInfoRef GetInstitutionVoucherInfo(const string &inst_abbrev)
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
ostream & AsString(ostream &s) const
Definition: Object_id.cpp:202
Exceptions for objmgr/util library.
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
static bool IsINSDCValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1224
static const string & GetInstitutionFullName(const string &short_name)
Definition: OrgMod.cpp:712
static bool HoldsInstitutionCode(const TSubtype stype)
This indicates if the given Org-mod subtype is supposed to hold an institution code (Example: "ATCC:2...
Definition: OrgMod.cpp:176
static bool ParseStructuredVoucher(const string &str, string &inst, string &coll, string &id)
Definition: OrgMod.cpp:189
CPCRPrimerSet –.
CPCRReactionSet –.
iterator begin(void)
Definition: qualifiers.hpp:192
iterator Erase(iterator it)
Definition: qualifiers.hpp:213
size_type Size() const
Definition: qualifiers.hpp:230
iterator LowerBound(Key &key)
Definition: qualifiers.hpp:206
iterator Find(const Key &key)
Definition: qualifiers.hpp:224
iterator end(void)
Definition: qualifiers.hpp:194
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
CRef< CSeqMasterIndex > GetMasterIndex(void) const
Definition: indexer.hpp:189
CRef< CBioseqIndex > GetBioseqIndex(void)
Definition: indexer.cpp:114
EQualifier
List of available qualifiers for feature keys.
@ eQual_recombination_class
@ eQual_UniProtKB_evidence
@ eQual_ribosomal_slippage
@ eQual_mobile_element_type
@ eQual_artificial_location
ESubtype GetSubtype(void) const
static const vector< string > & GetRecombinationClassList()
@ eSubtype_misc_difference
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
@ eSubtype_prim_transcript
@ eSubtype_transit_peptide_aa
static const vector< string > & GetRegulatoryClassList()
CRef< feature::CFeatTree > GetFeatTree(void) const
Definition: indexer.hpp:254
@ e_Ncbieaa
Definition: sequtil.hpp:57
@ e_Iupacaa
Definition: sequtil.hpp:55
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_Handle –.
CSeq_entry_Handle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
void SetLoc(const CSeq_loc &loc)
const CBioSource & GetSource(void) const
void x_FormatNoteQuals(CFlatFeature &ff) const
TQuals::const_iterator TQCI
void x_AddQuals(CBioseqContext &ctx) override
EItem GetItemType() const override
void x_AddQual(ESourceQualifier slot, const IFlatQVal *value) const
bool WasDesc(void) const
CSourceFeatureItem(const CBioSource &src, TRange range, CBioseqContext &ctx, CRef< feature::CFeatTree > ftree)
void x_GatherInfo(CBioseqContext &ctx) override
void x_AddPcrPrimersQuals(const CBioSource &src, CBioseqContext &ctx) const
void x_FormatQual(ESourceQualifier slot, const CTempString &name, CFlatFeature::TQuals &qvec, TQualFlags flags=0) const
void x_FormatQuals(CFlatFeature &ff) const override
void x_FormatGBNoteQuals(CFlatFeature &ff) const
void x_FormatNoteQual(ESourceQualifier slot, const char *name, CFlatFeature::TQuals &qvec, TQualFlags flags=0) const
void Subtract(const CSourceFeatureItem &other, CScope &scope)
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
TBase::const_iterator const_iterator
Definition: static_map.hpp:109
static bool NCBI_UseGeoLocNameForCountry(void)
Definition: SubSource.cpp:92
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
@ eItem_FeatHeader
Definition: item.hpp:75
@ eItem_SourceFeat
Definition: item.hpp:76
@ eItem_Feature
Definition: item.hpp:77
virtual void Format(TFlatQuals &quals, const CTempString &name, CBioseqContext &ctx, TFlags flags=0) const =0
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
string Tag(const string &name, int value)
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static bool s_CheckQuals_bind(const CMappedFeat &feat)
static const string s_TrnaList[]
bool s_ValidateMobileElementType(const string &mobile_element_type_value)
CSeq_id_Handle s_FindBestIdChoice(const CBioseq_Handle::TId &ids)
static bool s_CheckQuals_conflict(const CMappedFeat &feat, CBioseqContext &ctx)
static const TMobileElemTypeKey mobile_element_key_to_suffix_required[]
static bool s_CheckQuals_gap(const CMappedFeat &feat)
static int s_GetOverlap(const CMappedFeat &feat)
static bool s_SkipFeature(const CMappedFeat &feat, const CSeq_loc &loc, CBioseqContext &ctx)
static const string & s_AaName(int aa)
static bool s_CheckQuals_gene(const CMappedFeat &feat)
static bool s_LocIsFuzz(const CMappedFeat &feat, const CSeq_loc &loc)
static bool s_StrEqualDisregardFinalPeriod(const string &s1, const string &s2, NStr::ECase use_case)
static bool s_HasPub(const CMappedFeat &feat, CBioseqContext &ctx)
static bool s_CheckQuals_old_seq(const CMappedFeat &feat, CBioseqContext &ctx)
static void s_ParseParentQual(const CGb_qual &gbqual, list< string > &vals)
static bool s_CheckFuzz(const CInt_fuzz &fuzz)
static ESourceQualifier s_OrgModToSlot(const COrgMod &om)
static bool s_CheckQuals_regulatory(const CMappedFeat &feat)
static const TQualPair sc_GbToFeatQualMap[]
static string s_GetSpecimenVoucherText(CBioseqContext &ctx, const string &strRawName)
static ESourceQualifier s_SubSourceToSlot(const CSubSource &ss)
static bool s_IsValidExceptionText(const string &text)
static void s_AddPcrPrimersQualsAppend(string &output, const string &name, const string &str)
static bool s_CheckQuals_ncRNA(const CMappedFeat &feat)
CStaticPairArrayMap< EFeatureQualifier, CSeqFeatData::EQualifier > TQualMap
static bool s_IsLegalECNumber(const string &ec_number)
static const string & s_GetSiteName(CSeqFeatData::TSite site)
static const char *const sc_ValidPseudoGene[]
CStaticPairArrayMap< const char *, bool, PCase_CStr > TMobileElemTypeMap
static const string s_GetSubtypeString(const COrgMod::TSubtype &subtype)
static int s_ScoreSeqIdHandle(const CSeq_id_Handle &idh)
CStaticArraySet< const char *, PNocase > TLegalRefSeqExceptText
static bool s_IsValidRegulatoryClass(const string &type)
static void s_NoteFinalize(bool addPeriod, string &noteStr, CFlatFeature &flatFeature, ETildeStyle style=eTilde_newline)
#define DO_NOTE_PREPEND_NEWLINE(x)
SStaticPair< EFeatureQualifier, CSeqFeatData::EQualifier > TQualPair
#define DO_IMPORT(x)
static bool s_IsValidPseudoGene(objects::CFlatFileConfig::TMode mode, const string &text)
static int s_ToIupacaa(int aa)
static bool s_CheckQuals_mod_base(const CMappedFeat &feat)
static bool s_IsValidRecombinationClass(const string &type)
static bool s_CheckMandatoryQuals(const CMappedFeat &feat, const CSeq_loc &loc, CBioseqContext &ctx)
static bool s_IsValidDirection(const string &direction)
static void s_QualVectorToNote(const CFlatFeature::TQuals &qualVector, bool noRedundancy, string &note, string &punctuation, bool &addPeriod)
static bool s_CheckQuals_assembly_gap(const CMappedFeat &feat)
static bool s_IsValidnConsSplice(const string &cons_splice)
static const char *const sc_ValidExceptionText[]
static CMappedFeat s_GetBestProtFeature(const CBioseq_Handle &seq)
The best protein feature is defined as the one that has the most overlap with the given DNA.
#define DO_NOTE(x)
#define DO_QUAL(x)
static void s_HTMLizeExperimentQual(string &out_new_val, const string &val)
static CSeqFeatData::EQualifier s_GbToSeqFeatQual(EFeatureQualifier qual)
static bool s_IsValidRefSeqExceptionText(const string &text)
static bool s_TransSplicingFeatureAllowed(const CSeqFeatData &data)
CStaticArraySet< const char *, PNocase > TLegalPseudoGeneText
USING_SCOPE(sequence)
SStaticPair< const char *, bool > TMobileElemTypeKey
static bool s_HasCompareOrCitation(const CMappedFeat &feat, CBioseqContext &ctx)
static void s_SplitCommaSeparatedStringInParens(vector< string > &output_vec, const string &string_to_split)
static const char *const sc_ValidRefSeqExceptionText[]
static bool s_ValidId(const CSeq_id &id)
static const string & s_GetBondName(CSeqFeatData::TBond bond)
static bool s_CheckQuals_cdregion(const CMappedFeat &feat, const CSeq_loc &loc, CBioseqContext &ctx)
DEFINE_STATIC_ARRAY_MAP(TLegalPseudoGeneText, sc_ValidPseudoGeneText, sc_ValidPseudoGene)
CStaticArraySet< const char *, PNocase_CStr > TLegalExceptText
static const string s_GetSubsourceString(const CSubSource::TSubtype &subtype)
ESourceQualifier GetSourceQualOfOrgMod(COrgMod::ESubtype eOrgModSubtype)
Translate an org-mod subtype into a sourcequalifier.
ESourceQualifier
@ eSQ_focus
@ eSQ_none
@ eSQ_one_orgmod
@ eSQ_transposon_name
@ eSQ_org_xref
@ eSQ_common_name
@ eSQ_plasmid_name
@ eSQ_orgmod_note
@ eSQ_zero_orgmod
@ eSQ_country
@ eSQ_metagenomic
@ eSQ_zero_subsrc
@ eSQ_plastid_name
@ eSQ_insertion_seq_name
@ eSQ_organelle
@ eSQ_submitter_seqid
@ eSQ_citation
@ eSQ_PCR_primers
@ eSQ_mol_type
@ eSQ_organism
@ eSQ_label
@ eSQ_seqfeat_note
@ eSQ_db_xref
@ eSQ_endogenous_virus_name
@ eSQ_pcr_primer_note
@ eSQ_ecotype
@ eSQ_unstructured
ESourceQualifier GetSourceQualOfSubSource(CSubSource::ESubtype eSubSourceSubtype)
Translate a subsource subtype into a sourcequalifier.
EFeatureQualifier
@ eFQ_cons_splice
@ eFQ_label
@ eFQ_rpt_type
@ eFQ_codon
@ eFQ_PCR_conditions
@ eFQ_rpt_unit
@ eFQ_mobile_element_type
@ eFQ_cds_product
@ eFQ_prot_missing
@ eFQ_trans_splicing
@ eFQ_prot_conflict
@ eFQ_rpt_unit_seq
@ eFQ_gene_syn_refseq
@ eFQ_gene
@ eFQ_region_name
@ eFQ_db_xref
@ eFQ_product
@ eFQ_gen_map
@ eFQ_number
@ eFQ_operon
@ eFQ_bond
@ eFQ_gap_type
@ eFQ_phenotype
@ eFQ_pseudo
@ eFQ_prot_activity
@ eFQ_calculated_mol_wt
@ eFQ_translation
@ eFQ_function
@ eFQ_prot_comment
@ eFQ_exception_note
@ eFQ_circular_RNA
@ eFQ_prot_desc
@ eFQ_non_std_residue
@ eFQ_gene_syn
@ eFQ_prot_names
@ eFQ_gene_xref
@ eFQ_site_type
@ eFQ_figure
@ eFQ_UniProtKB_evidence
@ eFQ_tag_peptide
@ eFQ_region
@ eFQ_mobile_element
@ eFQ_transcript_id_note
@ eFQ_mod_base
@ eFQ_regulatory_class
@ eFQ_site
@ eFQ_coded_by
@ eFQ_go_function
@ eFQ_mol_wt
@ eFQ_rpt_family
@ eFQ_partial
@ eFQ_derived_from
@ eFQ_compare
@ eFQ_direction
@ eFQ_anticodon
@ eFQ_rad_map
@ eFQ_rrna_its
@ eFQ_trna_aa
@ eFQ_seqfeat_note
@ eFQ_gene_allele
@ eFQ_xtra_prod_quals
@ eFQ_prot_name
@ eFQ_trna_codons
@ eFQ_protein_id
@ eFQ_transcription
@ eFQ_cyt_map
@ eFQ_pseudogene
@ eFQ_prot_note
@ eFQ_clone
@ eFQ_ribosomal_slippage
@ eFQ_EC_number
@ eFQ_recombination_class
@ eFQ_maploc
@ eFQ_citation
@ eFQ_product_quals
@ eFQ_artificial_location
@ eFQ_linkage_evidence
@ eFQ_old_locus_tag
@ eFQ_bond_type
@ eFQ_replace
@ eFQ_gene_desc
@ eFQ_inference
@ eFQ_frequency
@ eFQ_go_component
@ eFQ_transcript_id
@ eFQ_standard_name
@ eFQ_bound_moiety
@ eFQ_exception
@ eFQ_estimated_length
@ eFQ_locus_tag
@ eFQ_encodes
@ eFQ_heterogen
@ eFQ_nomenclature
@ eFQ_prot_EC_number
@ eFQ_transl_except
@ eFQ_map
@ eFQ_evidence
@ eFQ_usedin
@ eFQ_organism
@ eFQ_ncRNA_class
@ eFQ_experiment
@ eFQ_go_process
@ eFQ_peptide
@ eFQ_prot_method
@ eFQ_rpt_unit_range
@ eFQ_gene_note
@ eFQ_codon_start
@ eFQ_illegal_qual
@ eFQ_sec_str_type
@ eFQ_modelev
@ eFQ_transl_table
@ eFQ_allele
@ eFQ_gene_map
@ eFQ_satellite
@ eFQ_none
CS_CONTEXT * ctx
Definition: t0006.c:12
static const struct name_t names[]
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:90
static SQLCHAR output[256]
Definition: print.c:5
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string
Definition: cgiapp.hpp:690
const TPrim & Get(void) const
Definition: serialbase.hpp:347
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:772
string GetLabel(const CSeq_id &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
@ eBoth
Type and content, delimited by a vertical bar.
Definition: Seq_id.hpp:606
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5087
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:858
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ eEmpty_Allow
ignore empty locations
Definition: Seq_loc.hpp:458
@ fSortAndMerge_All
Definition: Seq_loc.hpp:334
@ fStrand_Ignore
Definition: Seq_loc.hpp:325
@ fFGL_Content
Include its content if there is any.
Definition: feature.hpp:73
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
@ eSeqlocPartial_Complete
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
Definition: sequence.cpp:1366
CConstRef< CSeq_feat > GetOverlappingOperon(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1600
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
Definition: sequence.cpp:686
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
double GetProteinWeight(const CBioseq_Handle &handle, const CSeq_loc *location=0, TGetProteinWeight opts=0)
Handles the standard 20 amino acids and Sec and Pyl; treats Asx as Asp, Glx as Glu,...
Definition: weight.cpp:212
int TGetProteinWeight
Definition: weight.hpp:58
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
Definition: scope.cpp:538
EGetBioseqFlag
Definition: scope.hpp:125
@ eGetBioseq_Loaded
Search in all loaded TSEs in the scope.
Definition: scope.hpp:127
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
Definition: scope.hpp:128
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
bool IsSetExcept(void) const
vector< CSeq_id_Handle > TId
bool GetExcept(void) const
bool IsSetComment(void) const
const CPub_set & GetCit(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
bool GetPseudo(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
const CSeq_feat::TExts & GetExts(void) const
bool IsSetDbxref(void) const
bool IsSetExp_ev(void) const
CSeq_feat::EExp_ev GetExp_ev(void) const
const CSeqFeatData & GetData(void) const
bool IsSetTitle(void) const
bool IsSetXref(void) const
const CProt_ref * GetProtXref(void) const
get protein (if present) from Seq-feat.xref list
bool IsSetExcept_text(void) const
bool IsSetProduct(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const string & GetComment(void) const
const CUser_object & GetExt(void) const
const CSeq_annot::TDesc & Seq_annot_GetDesc(void) const
const string & GetExcept_text(void) const
const string & GetTitle(void) const
bool IsSetExts(void) const
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
const CSeq_feat::TDbxref & GetDbxref(void) const
bool IsSetQual(void) const
const CGene_ref * GetGeneXref(void) const
get gene (if present) from Seq-feat.xref list
bool IsSetPseudo(void) const
const CSeq_feat::TQual & GetQual(void) const
const TId & GetId(void) const
bool IsSetExt(void) const
bool Seq_annot_IsSetDesc(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool IsSetCit(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
CSeq_id_Handle GetProductId(void) const
bool IsSetPartial(void) const
const CSeq_loc & GetLocation(void) const
bool GetPartial(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetLimitTSE(const CTSE_Handle &limit)
Limit annotations to those from the TSE only.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void SetCoding(TCoding coding)
const_iterator begin(void) const
Definition: seq_vector.hpp:298
const_iterator end(void) const
Definition: seq_vector.hpp:305
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:1654
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2699
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3305
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
Definition: ncbistr.cpp:3233
ECase
Which type of string comparison.
Definition: ncbistr.hpp:1204
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5378
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2510
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
static const char label[]
const Tdata & Get(void) const
Get the member data.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
const TPcr_primers & GetPcr_primers(void) const
Get the Pcr_primers member data.
Definition: BioSource_.hpp:588
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
TOrigin GetOrigin(void) const
Get the Origin member data.
Definition: BioSource_.hpp:472
const Tdata & Get(void) const
Get the member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
bool IsSetPcr_primers(void) const
Check if a value has been assigned to Pcr_primers data member.
Definition: BioSource_.hpp:576
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: SubSource_.hpp:310
bool CanGet(void) const
Check if it is safe to call Get method.
bool CanGet(void) const
Check if it is safe to call Get method.
bool IsSetIs_focus(void) const
to distinguish biological focus Check if a value has been assigned to Is_focus data member.
Definition: BioSource_.hpp:552
list< CRef< CPCRReaction > > Tdata
list< CRef< CPCRPrimer > > Tdata
@ eSubtype_environmental_sample
Definition: SubSource_.hpp:111
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
@ eOrigin_synthetic
purely synthetic
Definition: BioSource_.hpp:134
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
Definition: Gene_ref_.hpp:756
bool IsSetFormal_name(void) const
Check if a value has been assigned to Formal_name data member.
Definition: Gene_ref_.hpp:828
const TFormal_name & GetFormal_name(void) const
Get the Formal_name member data.
Definition: Gene_ref_.hpp:840
const TSyn & GetSyn(void) const
Get the Syn member data.
Definition: Gene_ref_.hpp:768
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Gene_ref_.hpp:599
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
Definition: Gene_ref_.hpp:681
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
Definition: Gene_ref_.hpp:587
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
Definition: Gene_ref_.hpp:731
bool IsSetAllele(void) const
Official allele designation Check if a value has been assigned to Allele data member.
Definition: Gene_ref_.hpp:540
const TDb & GetDb(void) const
Get the Db member data.
Definition: Gene_ref_.hpp:743
bool IsSetMaploc(void) const
descriptive map location Check if a value has been assigned to Maploc data member.
Definition: Gene_ref_.hpp:634
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
list< string > TSyn
Definition: Gene_ref_.hpp:102
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Gene_ref_.hpp:706
const TAllele & GetAllele(void) const
Get the Allele member data.
Definition: Gene_ref_.hpp:552
const TMaploc & GetMaploc(void) const
Get the Maploc member data.
Definition: Gene_ref_.hpp:646
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TData & GetData(void) const
Get the Data member data.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
Definition: Dbtag_.hpp:255
const TFields & GetFields(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
vector< CRef< CUser_field > > TFields
E_Choice Which(void) const
Which variant is currently selected.
Definition: Object_id_.hpp:235
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
bool IsFields(void) const
Check if variant Fields is selected.
bool IsStr(void) const
Check if variant Str is selected.
vector< CRef< CUser_object > > TObjects
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
bool IsObjects(void) const
Check if variant Objects is selected.
const TObject & GetObject(void) const
Get the variant data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsObject(void) const
Check if variant Object is selected.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TObjects & GetObjects(void) const
Get the variant data.
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:479
const TDiv & GetDiv(void) const
Get the Div member data.
Definition: OrgName_.hpp:1005
bool IsSetCommon(void) const
common name Check if a value has been assigned to Common data member.
Definition: Org_ref_.hpp:407
bool IsSetMod(void) const
unstructured modifiers Check if a value has been assigned to Mod data member.
Definition: Org_ref_.hpp:454
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
const TCommon & GetCommon(void) const
Get the Common member data.
Definition: Org_ref_.hpp:419
const TDb & GetDb(void) const
Get the Db member data.
Definition: Org_ref_.hpp:491
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
Definition: OrgName_.hpp:993
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
const TMod & GetMod(void) const
Get the Mod member data.
Definition: Org_ref_.hpp:466
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_subgroup
Definition: OrgMod_.hpp:99
@ eSubtype_gb_acronym
used by taxonomy database
Definition: OrgMod_.hpp:115
@ eSubtype_gb_synonym
used by taxonomy database
Definition: OrgMod_.hpp:117
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_anamorph
Definition: OrgMod_.hpp:112
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_dosage
chromosome dosage of hybrid
Definition: OrgMod_.hpp:103
@ eSubtype_authority
Definition: OrgMod_.hpp:107
@ eSubtype_sub_species
Definition: OrgMod_.hpp:105
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
@ eSubtype_subtype
Definition: OrgMod_.hpp:88
@ eSubtype_teleomorph
Definition: OrgMod_.hpp:113
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_synonym
Definition: OrgMod_.hpp:111
@ eSubtype_group
Definition: OrgMod_.hpp:98
@ eSubtype_type_material
Definition: OrgMod_.hpp:121
@ eSubtype_acronym
Definition: OrgMod_.hpp:102
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_chemovar
Definition: OrgMod_.hpp:95
@ eSubtype_serovar
Definition: OrgMod_.hpp:92
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_gb_anamorph
used by taxonomy database
Definition: OrgMod_.hpp:116
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_forma_specialis
Definition: OrgMod_.hpp:109
@ eSubtype_old_lineage
Definition: OrgMod_.hpp:123
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
bool IsSetDesc(void) const
description (instead of name) Check if a value has been assigned to Desc data member.
Definition: Prot_ref_.hpp:391
EProcessed
processing status
Definition: Prot_ref_.hpp:95
list< string > TName
Definition: Prot_ref_.hpp:108
const TActivity & GetActivity(void) const
Get the Activity member data.
Definition: Prot_ref_.hpp:475
list< string > TActivity
Definition: Prot_ref_.hpp:111
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetEc(void) const
E.C.
Definition: Prot_ref_.hpp:438
list< string > TEc
Definition: Prot_ref_.hpp:110
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Prot_ref_.hpp:403
bool IsSetActivity(void) const
activities Check if a value has been assigned to Activity data member.
Definition: Prot_ref_.hpp:463
const TEc & GetEc(void) const
Get the Ec member data.
Definition: Prot_ref_.hpp:450
@ eProcessed_signal_peptide
Definition: Prot_ref_.hpp:99
@ eProcessed_transit_peptide
Definition: Prot_ref_.hpp:100
const TQuals & GetQuals(void) const
Get the Quals member data.
Definition: RNA_gen_.hpp:353
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
Definition: Trna_ext_.hpp:649
TNcbi8aa GetNcbi8aa(void) const
Get the variant data.
Definition: Trna_ext_.hpp:543
const TAa & GetAa(void) const
Get the Aa member data.
Definition: Trna_ext_.hpp:603
TNcbistdaa GetNcbistdaa(void) const
Get the variant data.
Definition: Trna_ext_.hpp:570
E_Choice Which(void) const
Which variant is currently selected.
Definition: RNA_ref_.hpp:449
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
Definition: Trna_ext_.hpp:591
bool IsTRNA(void) const
Check if variant TRNA is selected.
Definition: RNA_ref_.hpp:498
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
Definition: Trna_ext_.hpp:637
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
Definition: Trna_ext_.hpp:510
EType
type of RNA feature
Definition: RNA_ref_.hpp:95
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
TNcbieaa GetNcbieaa(void) const
Get the variant data.
Definition: Trna_ext_.hpp:516
bool IsSetCodon(void) const
codon(s) as in Genetic-code Check if a value has been assigned to Codon data member.
Definition: Trna_ext_.hpp:612
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
TIupacaa GetIupacaa(void) const
Get the variant data.
Definition: Trna_ext_.hpp:489
bool IsSetQuals(void) const
e.g., tag_peptide qualifier for tmRNAs Check if a value has been assigned to Quals data member.
Definition: RNA_gen_.hpp:341
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
E_Choice Which(void) const
Which variant is currently selected.
Definition: Trna_ext_.hpp:454
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TTRNA & GetTRNA(void) const
Get the variant data.
Definition: RNA_ref_.cpp:134
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
@ e_Name
for naming "other" type
Definition: RNA_ref_.hpp:134
const TVal & GetVal(void) const
Get the Val member data.
Definition: Gb_qual_.hpp:259
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:259
bool IsSetLoc(void) const
original location string Check if a value has been assigned to Loc data member.
Definition: Imp_feat_.hpp:294
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
EPsec_str
protein secondary structure
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
Definition: Cdregion_.hpp:700
bool IsBond(void) const
Check if variant Bond is selected.
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
Definition: Imp_feat_.hpp:247
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsSetConflict(void) const
conflict Check if a value has been assigned to Conflict data member.
Definition: Cdregion_.hpp:559
list< CRef< CCode_break > > TCode_break
Definition: Cdregion_.hpp:111
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TCode & GetCode(void) const
Get the Code member data.
Definition: Cdregion_.hpp:712
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
bool IsPsec_str(void) const
Check if variant Psec_str is selected.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
bool IsHet(void) const
Check if variant Het is selected.
const TCdregion & GetCdregion(void) const
Get the variant data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
Definition: Gb_qual_.hpp:200
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
bool IsVariation(void) const
Check if variant Variation is selected.
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
bool IsSite(void) const
Check if variant Site is selected.
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Gb_qual_.hpp:212
const TRna & GetRna(void) const
Get the variant data.
bool IsNon_std_residue(void) const
Check if variant Non_std_residue is selected.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetVal(void) const
Check if a value has been assigned to Val data member.
Definition: Gb_qual_.hpp:247
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Imp_feat_.hpp:306
bool IsRna(void) const
Check if variant Rna is selected.
bool IsRegion(void) const
Check if variant Region is selected.
TConflict GetConflict(void) const
Get the Conflict member data.
Definition: Cdregion_.hpp:578
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
const TImp & GetImp(void) const
Get the variant data.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_Region
named region (globin locus)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
@ eExp_ev_experimental
any reasonable experimental check
Definition: Seq_feat_.hpp:102
@ eExp_ev_not_experimental
similarity, pattern, etc
Definition: Seq_feat_.hpp:103
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
@ e_Ncbi8aa
NCBI8aa code.
@ e_Ncbieaa
ASCII value of NCBIeaa code.
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
Definition: Seq_id_.hpp:113
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Prf
PRF SEQDB.
Definition: Seq_id_.hpp:108
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
@ e_Int
from to
Definition: Seq_loc_.hpp:101
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const Tdata & Get(void) const
Get the member data.
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:497
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Definition: Bioseq_.hpp:278
const TComment & GetComment(void) const
Get the variant data.
Definition: Seqdesc_.hpp:1058
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
list< CRef< CAnnotdesc > > Tdata
@ eTech_concept_trans
conceptual translation
Definition: MolInfo_.hpp:131
@ eTech_standard
standard sequencing
Definition: MolInfo_.hpp:124
@ eTech_concept_trans_a
conceptual transl. supplied by author
Definition: MolInfo_.hpp:136
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
Definition: MolInfo_.hpp:113
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
const TLiteral & GetLiteral(void) const
Get the variant data.
list< CRef< CDelta_item > > TDelta
bool IsLiteral(void) const
Check if variant Literal is selected.
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
yy_size_t n
static void text(MDB_val *v)
Definition: mdb_dump.c:62
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
static const BitmapCharRec ch1
Definition: ncbi_10x20.c:1827
static const BitmapCharRec ch2
Definition: ncbi_10x20.c:1819
ESERV_Site site
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
int toupper(Uchar c)
Definition: ncbictype.hpp:73
#define nullptr
Definition: ncbimisc.hpp:45
T min(T x_, T y_)
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
Int4 delta(size_t dimension_, const Int4 *score_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
bool TrimSpacesAndJunkFromEnds(string &str, bool allow_ellipsis=false)
Definition: objutil.cpp:475
bool RemovePeriodFromEnd(string &str, bool keep_ellipsis=true)
Definition: objutil.cpp:299
bool IsValidAccession(const string &accn, EAccValFlag flag=eValidateAcc)
Definition: objutil.cpp:1227
ETildeStyle
Definition: objutil.hpp:47
@ eTilde_newline
Definition: objutil.hpp:50
@ eTilde_tilde
Definition: objutil.hpp:48
@ eTilde_space
Definition: objutil.hpp:49
@ eTilde_note
Definition: objutil.hpp:52
void ExpandTildes(string &s, ETildeStyle style)
Definition: objutil.cpp:152
@ eValidateAccDotVer
Definition: objutil.hpp:97
const char * GetAAName(unsigned char aa, bool is_ascii)
Definition: objutil.cpp:1559
const char * strLinkBasePubmed
Definition: objutil.cpp:1648
void JoinString(string &to, const string &prefix, const string &str, bool noRedundancy=true)
Definition: objutil.cpp:1050
bool CommentHasSuspiciousHtml(const string &str)
Definition: objutil.cpp:1879
void AddPeriod(string &str)
Definition: objutil.cpp:377
const string & GetTechString(int tech)
Definition: objutil.cpp:1364
void ConvertQuotes(string &str)
Definition: objutil.cpp:240
#define Loc
static BOOL number
Definition: pcre2grep.c:285
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)
FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define FIELD_IS_SET(Var, Fld)
FIELD_IS_SET base macro.
#define GET_STRING_FLD_OR_BLANK(Var, Fld)
GET_STRING_FLD_OR_BLANK base macro.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CObjectManager > om
SAnnotSelector –.
EFeatureQualifier m_Value
const char * m_Name
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Definition: type.c:6
#define _ASSERT
#define local
Definition: zutil.h:33
Modified on Fri Sep 20 14:57:52 2024 by modify_doxy.py rev. 669887