NCBI C++ ToolKit
feature_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature_item.cpp 101909 2024-03-01 12:11:21Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat
28 * Maintainer: Frank Ludwig
29 *
30 * File Description:
31 * new (early 2003) flat-file generator -- representation of features
32 * (mainly of interest to implementors)
33 *
34 *
35 * WHEN EDITING THE LIST OF QUALIFIERS:
36 *
37 * - there is currently a lot of parallel logic for the FTable case
38 * (CFeatureItem::x_AddFTableQuals()) and the standard case
39 * (CFeatureItem::x_Add...Quals()). Make sure to edit both cases as
40 * appropriate.
41 * ===========================================================================
42 */
43 #include <ncbi_pch.hpp>
44 #include <corelib/ncbistd.hpp>
45 #include <serial/iterator.hpp>
46 #include <serial/enumvalues.hpp>
47 
48 #include <algorithm>
49 #include <sstream>
50 
51 #include <objects/seq/Bioseq.hpp>
53 #include <objects/seq/MolInfo.hpp>
89 
90 #include <objmgr/scope.hpp>
92 #include <objmgr/seqdesc_ci.hpp>
93 #include <objmgr/seq_vector.hpp>
94 #include <objmgr/util/sequence.hpp>
95 #include <objmgr/util/feature.hpp>
96 #include <objmgr/util/weight.hpp>
98 
99 #include <util/static_set.hpp>
100 #include <util/static_map.hpp>
101 #include <util/sequtil/sequtil.hpp>
103 
104 #include <algorithm>
111 #include <objmgr/util/objutil.hpp>
112 #include "inst_info_map.hpp"
113 
114 // On Mac OS X 10.3, FixMath.h defines ff as a one-argument macro(!)
115 #ifdef ff
116 # undef ff
117 #endif
118 
121 USING_SCOPE(sequence);
122 
124 {
125 public:
126  bool operator() ( const CConstRef<CFlatGoQVal> &obj1, const CConstRef<CFlatGoQVal> &obj2 )
127  {
128  const CFlatGoQVal *qval1 = obj1.GetNonNullPointer();
129  const CFlatGoQVal *qval2 = obj2.GetNonNullPointer();
130 
131  // sort by text string
132  const string &str1 = qval1->GetTextString();
133  const string &str2 = qval2->GetTextString();
134 
135  int textComparison = 0;
136 
137  // This whole paragraph should eventually be replaced with a mere NStr::CompareNocase stored into textComparison
138  // We can't just use NStr::CompareNocase, because that compares using tolower, whereas
139  // we must compare with toupper to maintain compatibility with C.
140  SIZE_TYPE pos = 0;
141  const SIZE_TYPE min_length = min( str1.length(), str2.length() );
142  for( ; pos < min_length; ++pos ) {
143  textComparison = toupper( str1[pos] ) - toupper( str2[pos] );
144  if( textComparison != 0 ) {
145  break;
146  }
147  }
148  if( 0 == textComparison ) {
149  // if we reached the end, compare via length (shorter first)
150  textComparison = str1.length() - str2.length();
151  }
152 
153  // compare by text, if possible
154  if( textComparison < 0 ) {
155  return true;
156  } else if( textComparison > 0 ) {
157  return false;
158  }
159 
160  // if text is tied, then sort by pubmed id, if any
161  int pmid1 = qval1->GetPubmedId();
162  int pmid2 = qval2->GetPubmedId();
163 
164  if( 0 == pmid1 ) {
165  return false;
166  } else if( 0 == pmid2 ) {
167  return true;
168  } else {
169  return pmid1 < pmid2;
170  }
171  }
172 };
173 
174 // -- static functions
175 
176 static bool s_ValidId(const CSeq_id& id)
177 {
178  return id.IsGenbank() || id.IsEmbl() || id.IsDdbj() ||
179  id.IsOther() || id.IsPatent() ||
180  id.IsTpg() || id.IsTpe() || id.IsTpd() ||
181  id.IsGpipe();
182 }
183 
184 static
186  const string &s1, const string &s2,
187  NStr::ECase use_case )
188 {
189  if( s1.empty() || s2.empty() ) {
190  return s1.empty() && s2.empty();
191  }
192 
193  // set length to disregard final period, if any
194  size_t s1_len = s1.length();
195  if( s1[s1_len-1] == '.' ) {
196  --s1_len;
197  }
198  size_t s2_len = s2.length();
199  if( s2[s2_len-1] == '.' ) {
200  --s2_len;
201  }
202 
203  if( s1_len != s2_len ) {
204  return false;
205  }
206 
207  // NStr::Equal does not have exactly the function I want,
208  // so I have to make my own.
209  for( size_t ii = 0; ii < s1_len ; ++ii ) {
210  const char ch1 = ( use_case == NStr::eNocase ? toupper(s1[ii]) : s1[ii] );
211  const char ch2 = ( use_case == NStr::eNocase ? toupper(s2[ii]) : s2[ii] );
212  if( ch1 != ch2 ) {
213  return false;
214  }
215  }
216  return true;
217 }
218 
219 static bool s_CheckQuals_cdregion(const CMappedFeat& feat,
220  const CSeq_loc& loc,
222 {
223  if ( !ctx.Config().CheckCDSProductId() ) {
224  return true;
225  }
226 
227  CScope& scope = ctx.GetScope();
228 
229  // non-pseudo CDS must have /product
230  bool pseudo = feat.IsSetPseudo() && feat.GetPseudo() ;
231  if ( !pseudo && !ctx.IsEMBL() && !ctx.IsDDBJ() ) {
232  const CGene_ref* grp = feat.GetGeneXref();
233  if (! grp) {
234  CConstRef<CSeq_feat> gene = GetOverlappingGene(loc, scope);
235  if (gene) {
236  pseudo = gene->IsSetPseudo() && gene->GetPseudo();
237  if ( !pseudo ) {
238  grp = &(gene->GetData().GetGene());
239  }
240  }
241  }
242  if (! pseudo && grp) {
243  pseudo = grp->GetPseudo();
244  }
245  }
246 
247  bool just_stop = false;
248  const CSeq_loc& Loc = feat.GetLocation();
250  if ( GetLength(Loc, &scope) <= 5 ) {
251  just_stop = true;
252  }
253  }
254 
255  if ( pseudo || just_stop ) {
256  return true;
257  }
258 
259  // make sure the product has a valid accession
260  if (feat.IsSetProduct()) {
262  try {
263  id.Reset(&(GetId(feat.GetProduct(), &scope)));
264  } catch ( CException& ) {
265  id.Reset();
266  }
267  if (id) {
268  if ((id->IsGi() && id->GetGi() > ZERO_GI) || id->IsLocal()) {
269  CBioseq_Handle prod = scope.GetBioseqHandleFromTSE(*id, ctx.GetHandle());
270  if (prod) {
271  ITERATE (CBioseq_Handle::TId, it, prod.GetId()) {
272  if (s_ValidId(*it->GetSeqId())) {
273  CConstRef<CTextseq_id> tsip(it->GetSeqId()->GetTextseq_Id());
274  if (tsip && tsip->IsSetAccession() &&
275  IsValidAccession(tsip->GetAccession())) {
276  return true;
277  }
278  }
279  }
280  } else if (id->IsGi() && id->GetGi() > ZERO_GI) {
281  // RELEASE_MODE requires that /protein_id is an accession
282  if (ctx.Config().IsModeRelease()) {
283  try {
284  if (IsValidAccession(GetAccessionForGi(id->GetGi(), scope))) {
285  return true;
286  }
287  } catch (CException&) {
288  }
289  }
290  }
291  } else if (s_ValidId(*id)) {
293  if (tsip && tsip->IsSetAccession() &&
294  IsValidAccession(tsip->GetAccession())) {
295  return true;
296  }
297  }
298  }
299  } else { // no product
300  if (feat.IsSetExcept() && feat.GetExcept() &&
301  feat.IsSetExcept_text() ) {
302  if (NStr::Find(feat.GetExcept_text(),
303  "rearrangement required for product") != NPOS) {
304  return true;
305  }
306  }
307  }
308 
309  return false;
310 }
311 
312 
313 
314 static bool s_HasPub(const CMappedFeat& feat, CBioseqContext& ctx)
315 {
316  ITERATE(CBioseqContext::TReferences, it, ctx.GetReferences()) {
317  if ((*it)->Matches(feat.GetCit())) {
318  return true;
319  }
320  }
321 
322  return false;
323 }
324 
325 
327 {
328  // check for /compare
329  if (!NStr::IsBlank(feat.GetNamedQual("compare"))) {
330  return true;
331  }
332 
333  // check for /citation
334  if (feat.IsSetCit()) {
335  return s_HasPub(feat, ctx);
336  }
337 
338  return false;
339 }
340 
341 
342 // conflict requires /citation or /compare
344 {
345  // RefSeq allows conflict with accession in comment instead of sfp->cit
346  if (ctx.IsRefSeq() &&
347  feat.IsSetComment() && !NStr::IsBlank(feat.GetComment())) {
348  return true;
349  }
350 
351  return s_HasCompareOrCitation(feat, ctx);
352 }
353 
354 // old_sequence requires /citation or /compare
356 {
357  return s_HasCompareOrCitation(feat, ctx);
358 }
359 
360 
361 static bool s_CheckQuals_gene(const CMappedFeat& feat)
362 {
363  // gene requires /gene or /locus_tag, but desc or syn can be mapped to /gene
364  const CSeqFeatData::TGene& gene = feat.GetData().GetGene();
365  if ( (gene.IsSetLocus() && !gene.GetLocus().empty()) ||
366  (gene.IsSetLocus_tag() && !gene.GetLocus_tag().empty()) ||
367  (gene.IsSetDesc() && !gene.GetDesc().empty()) ||
368  (!gene.GetSyn().empty() && !gene.GetSyn().front().empty()) ) {
369  return true;
370  }
371 
372  return false;
373 }
374 
375 
376 static bool s_CheckQuals_bind(const CMappedFeat& feat)
377 {
378  // protein_bind or misc_binding require eFQ_bound_moiety
379  return !NStr::IsBlank(feat.GetNamedQual("bound_moiety"));
380 }
381 
382 
383 static bool s_CheckQuals_mod_base(const CMappedFeat& feat)
384 {
385  // modified_base requires eFQ_mod_base
386  return !NStr::IsBlank(feat.GetNamedQual("mod_base"));
387 }
388 
389 
390 static bool s_CheckQuals_gap(const CMappedFeat& feat)
391 {
392  // gap feature must have /estimated_length qual
393  return !feat.GetNamedQual("estimated_length").empty();
394 }
395 
396 static bool s_CheckQuals_assembly_gap(const CMappedFeat& feat)
397 {
398  // assembly_gap feature must have /estimated_length qual
399  // and /gap_type
400  return ! feat.GetNamedQual("estimated_length").empty() &&
401  ! feat.GetNamedQual("gap_type").empty();
402 }
403 
404 
405 static bool s_CheckQuals_ncRNA(const CMappedFeat& feat)
406 {
407  if( !NStr::IsBlank(feat.GetNamedQual("ncRNA_class")) ) {
408  return true;
409  }
410 
411  // Look at this mess; if only we could use sequence_macros.hpp
412  if( feat.GetData().GetRna().IsSetExt() &&
413  feat.GetData().GetRna().GetExt().IsGen() &&
414  feat.GetData().GetRna().GetExt().GetGen().IsSetClass() &&
415  !NStr::IsBlank(feat.GetData().GetRna().GetExt().GetGen().GetClass()) )
416  {
417  return true;
418  }
419 
420  return false;
421 }
422 
423 
424 static bool s_CheckQuals_regulatory(const CMappedFeat& feat)
425 {
426  // regulatory feature must have /regulatory_class qual
427  return ! feat.GetNamedQual("regulatory_class").empty();
428 }
429 
430 
431 static bool s_CheckMandatoryQuals(const CMappedFeat& feat,
432  const CSeq_loc& loc,
434 {
435  switch ( feat.GetData().GetSubtype() ) {
437  {
438  return s_CheckQuals_cdregion(feat, loc, ctx);
439  }
441  {
442  return s_CheckQuals_conflict(feat, ctx);
443  }
445  {
446  return s_CheckQuals_old_seq(feat, ctx);
447  }
449  {
450  return s_CheckQuals_gene(feat);
451  }
454  {
455  return s_CheckQuals_bind(feat);
456  }
458  {
459  return s_CheckQuals_mod_base(feat);
460  }
462  {
463  return s_CheckQuals_gap(feat);
464  }
466  {
467  return s_CheckQuals_assembly_gap(feat);
468  }
470  {
471  return s_CheckQuals_ncRNA(feat);
472  }
474  {
475  return s_CheckQuals_regulatory(feat);
476  }
477  default:
478  break;
479  }
480 
481  return true;
482 }
483 
484 static bool s_SkipFeature(const CMappedFeat& feat,
485  const CSeq_loc& loc,
487 {
489  CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
490 
491  if ( subtype == CSeqFeatData::eSubtype_pub ||
492  /* subtype == CSeqFeatData::eSubtype_non_std_residue || */
493  subtype == CSeqFeatData::eSubtype_biosrc ||
494  subtype == CSeqFeatData::eSubtype_rsite ||
495  subtype == CSeqFeatData::eSubtype_seq ) {
496  return true;
497  }
498 
499  const CFlatFileConfig& cfg = ctx.Config();
500 
501  // check feature customization flags
502  if ( cfg.ValidateFeatures() &&
503  (subtype == CSeqFeatData::eSubtype_bad ||
504  subtype == CSeqFeatData::eSubtype_virion) ) {
505  return true;
506  }
507 
508  if ( cfg.ValidateFeatures() && type == CSeqFeatData::e_Imp ) {
509  switch ( subtype ) {
510  default:
511  break;
517  return true;
518  }
519  }
520 
521  if ( ctx.IsNuc() && subtype == CSeqFeatData::eSubtype_het ) {
522  return true;
523  }
524 
525  if ( cfg.HideImpFeatures() && type == CSeqFeatData::e_Imp ) {
526  return true;
527  }
528 
529  if ( cfg.HideMiscFeatures() ) {
530  if ( type == CSeqFeatData::e_Site ||
535  subtype == CSeqFeatData::eSubtype_preprotein ) {
536  return true;
537  }
538  }
539 
540  if ( cfg.HideExonFeatures() && subtype == CSeqFeatData::eSubtype_exon ) {
541  return true;
542  }
543 
544  if ( cfg.IsPolicyGenomes() && subtype == CSeqFeatData::eSubtype_exon &&
545  (ctx.GetBiomol() == CMolInfo::eBiomol_mRNA || ctx.GetBiomol() == CMolInfo::eBiomol_transcribed_RNA) ) {
546  return true;
547  }
548 
549  if ( cfg.HideIntronFeatures() && subtype == CSeqFeatData::eSubtype_intron ) {
550  return true;
551  }
552 
553  if ( cfg.HideRemoteImpFeatures() && type == CSeqFeatData::e_Imp ) {
554  if ( subtype == CSeqFeatData::eSubtype_variation ||
555  subtype == CSeqFeatData::eSubtype_exon ||
556  subtype == CSeqFeatData::eSubtype_intron ||
558  return true;
559  }
560  }
561 
563  const CSeq_feat::TDbxref& dbxref = feat.GetDbxref();
564  ITERATE (CSeq_feat::TDbxref, it, dbxref) {
565  const CDbtag& dbt = **it;
566  if ( dbt.IsSetDb() && !dbt.GetDb().empty() && dbt.GetDb() == "dbSNP") {
567  return true;
568  }
569  }
570  }
571 
572  if ( cfg.GeneRNACDSFeatures() ) {
573  if ( type != CSeqFeatData::e_Gene &&
576  return true;
577  }
578  }
579 
580  // skip genes in DDBJ format
581  if ( cfg.IsFormatDDBJ() && type == CSeqFeatData::e_Gene ) {
582  return true;
583  }
584 
585  // if RELEASE mode, make sure we have all info to create mandatory quals.
586  if ( cfg.NeedRequiredQuals() ) {
587  return !s_CheckMandatoryQuals(feat, loc, ctx);
588  }
589 
590  return false;
591 }
592 
594 public:
595  bool operator()( const char ch )
596  {
597  return( ! isdigit(ch) && ch != '.' && ch != '-' );
598  }
599 };
600 
601 // acceptable patterns are: (This might not be true anymore. Check the code. )
602 // num.num.num.num
603 // num.num.num.-
604 // num.num.-.-
605 // num.-.-.-
606 // -.-.-.-
607 // (You can use "n" instead of "-" )
608 static bool s_IsLegalECNumber(const string& ec_number)
609 {
610  if ( ec_number.empty() ) return false;
611 
612  bool is_ambig = false;
613  int numperiods = 0;
614  int numdigits = 0;
615  int numdashes = 0;
616 
617  ITERATE( string, ec_iter, ec_number ) {
618  if ( isdigit(*ec_iter) ) {
619  numdigits++;
620  if (is_ambig) return false;
621  } else if (*ec_iter == '-' ) {
622  numdashes++;
623  is_ambig = true;
624  } else if( *ec_iter == 'n') {
625  string::const_iterator ec_iter_next = ec_iter;
626  ++ec_iter_next;
627  if( ec_iter_next != ec_number.end() && numperiods == 3 && numdigits == 0 && isdigit(*ec_iter_next) ) {
628  // allow/ignore n in first position of fourth number to not mean ambiguous, if followed by digit
629  } else {
630  numdashes++;
631  is_ambig = true;
632  }
633  } else if (*ec_iter == '.') {
634  numperiods++;
635  if (numdigits > 0 && numdashes > 0) return false;
636  if (numdigits == 0 && numdashes == 0) return false;
637  if (numdashes > 1) return false;
638  numdigits = 0;
639  numdashes = 0;
640  }
641  }
642 
643  if (numperiods == 3) {
644  if (numdigits > 0 && numdashes > 0) return false;
645  if (numdigits > 0 || numdashes == 1) return true;
646  }
647 
648  return false;
649 }
650 
651 
652 static const string& s_GetBondName(CSeqFeatData::TBond bond)
653 {
654  static const string kOther = "unclassified";
655  return (bond == CSeqFeatData::eBond_other) ? kOther :
656  CSeqFeatData::ENUM_METHOD_NAME(EBond)()->FindName(bond, true);
657 }
658 
659 static void s_QualVectorToNote(
660  const CFlatFeature::TQuals& qualVector,
661  bool noRedundancy,
662  string& note,
663  string& punctuation,
664  bool& addPeriod)
665 {
666  // is there at least one note which is more than blank or a period?
667  bool hasSubstantiveNote = false;
668  // store this so we can chop off the extra stuff we added if there was no note of substance
669  const string::size_type original_length = note.length();
670 
671  string prefix;
672  ITERATE (CFlatFeature::TQuals, it, qualVector) {
673  const string& qual = (*it)->GetValue();
674 
675  prefix.erase();
676  if ( !note.empty() ) {
677  prefix = punctuation;
678  const string& next_prefix = (*it)->GetPrefix();
679  if (!NStr::EndsWith(prefix, '\n') ) {
680  prefix += next_prefix;
681  }
682  }
683 
684  if( !qual.empty() && qual != "." ) {
685  hasSubstantiveNote = true;
686  }
687 
688  // A qual may declare that it be shown even if redundant and override the
689  // given noRedundancy variable
690  const bool noRedundancyThisIteration =
691  ( 0 != ( (*it)->GetFlags() & CFormatQual::fFlags_showEvenIfRedund ) ? false : noRedundancy );
692  JoinString(note, prefix, qual, noRedundancyThisIteration );
693 
694  addPeriod = (*it)->GetAddPeriod();
695  punctuation = (*it)->GetSuffix();
696  }
697 
698  // if there was no meaningful note, we clear it
699  if( ! hasSubstantiveNote ) {
700  note.resize( original_length );
701  }
702 }
703 
704 
705 static void s_NoteFinalize(
706  bool addPeriod,
707  string& noteStr,
708  CFlatFeature& flatFeature,
709  ETildeStyle style = eTilde_newline ) {
710 
711  if (!noteStr.empty()) {
712  if (addPeriod && !NStr::EndsWith(noteStr, ".")) {
713 
714  AddPeriod(noteStr);
715  }
716  // Policy change: expand tilde on both descriptors and features
717  ExpandTildes(noteStr, style);
718  TrimSpacesAndJunkFromEnds( noteStr, true );
719 
720  CRef<CFormatQual> note(new CFormatQual("note", noteStr));
721  flatFeature.SetQuals().push_back(note);
722  }
723 }
724 
725 static int s_GetOverlap(const CMappedFeat& feat )
726 {
727  if (feat) {
728  int total_length = 0;
729  ITERATE( CSeq_loc, loc_iter, feat.GetLocation() ) {
730  total_length += loc_iter.GetRange().GetLength();
731  }
732  return total_length;
733  }
734  return 0;
735 }
736 
737 
738 ///
739 /// The best protein feature is defined as the one that has the most overlap
740 /// with the given DNA.
741 /// If there is a tie between two protein features in overlap then the one
742 /// with the lesser processing status is declared the winner.
743 ///
745 {
747  sel.SetLimitTSE(seq.GetTSE_Handle());
748 
749  CMappedFeat best;
751  int best_overlap = 0;
752 
753  for (CFeat_CI it(seq, sel); it; ++it) {
754 
755  if ( !best ) {
756 
757  best = *it;
758  best_processed = it->GetData().GetProt().GetProcessed();
759  best_overlap = s_GetOverlap(best);
760 
761  } else {
762 
763  int current_overlap = s_GetOverlap(*it);
764  CProt_ref::TProcessed current_processed = it->GetData().GetProt().GetProcessed();
765 
766  if ( best_overlap < current_overlap ) {
767 
768  best_overlap = current_overlap;
769  best_processed = current_processed;
770  best = *it;
771 
772  } else if ( (best_overlap == current_overlap) && (best_processed > current_processed) ) {
773 
774  best_processed = current_processed;
775  best = *it;
776  }
777  }
778  }
779  return best;
780 }
781 
782 // -- FeatureHeader
783 
785 {
786  x_GatherInfo(ctx);
787 }
788 
790 {
791  return eItem_FeatHeader;
792 }
793 
795 {
796  if ( ctx.Config().IsFormatFTable() ) {
797  m_Id.Reset(ctx.GetPrimaryId());
798  }
799 }
800 
801 static bool s_CheckFuzz(const CInt_fuzz& fuzz)
802 {
803  return !(fuzz.IsLim() && fuzz.GetLim() == CInt_fuzz::eLim_unk);
804 }
805 
806 static bool s_LocIsFuzz(const CMappedFeat& feat, const CSeq_loc& loc)
807 {
808  if ( feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_imp &&
809  feat.GetData().IsImp() ) { // unmappable impfeats
810  const CSeqFeatData::TImp& imp = feat.GetData().GetImp();
811  if ( imp.IsSetLoc() ) {
812  const string& imploc = imp.GetLoc();
813  if ( imploc.find('<') != NPOS || imploc.find('>') != NPOS ) {
814  return true;
815  }
816  }
817  } else { // any regular feature test location for fuzz
818  for ( CSeq_loc_CI it(loc, CSeq_loc_CI::eEmpty_Allow); it; ++it ) {
819  const CSeq_loc& l = it.GetEmbeddingSeq_loc();
820  switch ( l.Which() ) {
821  case CSeq_loc::e_Pnt:
822  {{
823  if ( l.GetPnt().IsSetFuzz() ) {
824  if ( s_CheckFuzz(l.GetPnt().GetFuzz()) ) {
825  return true;
826  }
827  }
828  break;
829  }}
831  {{
832  if ( l.GetPacked_pnt().IsSetFuzz() ) {
833  if ( s_CheckFuzz(l.GetPacked_pnt().GetFuzz()) ) {
834  return true;
835  }
836  }
837  break;
838  }}
839  case CSeq_loc::e_Int:
840  {{
841  bool fuzz = false;
842  if ( l.GetInt().IsSetFuzz_from() ) {
843  fuzz = s_CheckFuzz(l.GetInt().GetFuzz_from());
844  }
845  if ( !fuzz && l.GetInt().IsSetFuzz_to() ) {
846  fuzz = s_CheckFuzz(l.GetInt().GetFuzz_to());
847  }
848  if ( fuzz ) {
849  return true;
850  }
851  break;
852  }}
854  {{
857  return true;
858  }
859  break;
860  }}
861  case CSeq_loc::e_Null:
862  {{
863  return true;
864  }}
865  default:
866  break;
867  }
868  }
869  }
870 
871  return false;
872 }
873 
874 static void s_AddPcrPrimersQualsAppend( string &output, const string &name, const string &str )
875 {
876  if( ! str.empty() ) {
877  if( ! output.empty() ) {
878  output += ", ";
879  }
880  output += name + str;
881  }
882 }
883 
884 // This splits a string that's comma-separated with parens at start and end
885 // (or, string might just contain a single string, so no splitting is needed,
886 // in which case the output_vec will be of size 1)
887 static void s_SplitCommaSeparatedStringInParens( vector<string> &output_vec, const string &string_to_split )
888 {
889  // nothing to do since no input
890  if( string_to_split.empty() ) {
891  return;
892  }
893 
894  // no splitting required
895  if( string_to_split[0] != '(' ) {
896  output_vec.push_back( string_to_split );
897  return;
898  }
899 
900  // if ends with closing paren, chop that off.
901  // ( It's actually a data error if we DON'T end with a ')', but we continue anyway, since
902  // we want to do the best we can with the data we get. )
903  size_t amount_to_chop_off_end = 0;
904  if( string_to_split[string_to_split.length() - 1] == ')' ) {
905  amount_to_chop_off_end = 1;
906  }
907 
908  NStr::Split( string_to_split.substr( 1, string_to_split.length() - amount_to_chop_off_end - 1), ",", output_vec, 0 );
909 }
910 
911 static const char* const sc_ValidPseudoGene[] = {
912  "allelic",
913  "processed",
914  "unitary",
915  "unknown",
916  "unprocessed"
917 };
920 
921 static bool s_IsValidPseudoGene( objects::CFlatFileConfig::TMode mode, const string& text)
922 {
923  switch(mode)
924  {
925  case objects::CFlatFileConfig::eMode_Release:
926  case objects::CFlatFileConfig::eMode_Entrez:
927  return sc_ValidPseudoGeneText.find(text.c_str()) != sc_ValidPseudoGeneText.end();
928  default:
929  return ! text.empty();
930  }
931 }
932 
933 static const char* const sc_ValidExceptionText[] = {
934  "annotated by transcript or proteomic data",
935  "rearrangement required for product",
936  "reasons given in citation",
937  "RNA editing"
938 };
941 
942 static bool s_IsValidExceptionText(const string& text)
943 {
944  return sc_LegalExceptText.find(text.c_str()) != sc_LegalExceptText.end();
945 }
946 
947 
948 static const char* const sc_ValidRefSeqExceptionText[] = {
949  "adjusted for low-quality genome",
950  "alternative processing",
951  "alternative start codon",
952  "artificial frameshift",
953  "dicistronic gene",
954  "mismatches in transcription",
955  "mismatches in translation",
956  "modified codon recognition",
957  "nonconsensus splice site",
958  "transcribed product replaced",
959  "transcribed pseudogene",
960  "translated product replaced",
961  "unclassified transcription discrepancy",
962  "unclassified translation discrepancy",
963  "unextendable partial coding region"
964 };
967 
968 static bool s_IsValidRefSeqExceptionText(const string& text)
969 {
970  return sc_LegalRefSeqExceptText.find(text.c_str()) != sc_LegalRefSeqExceptText.end();
971 }
972 
973 // -- FeatureItemBase
974 
976 (const CMappedFeat& feat,
979  const CSeq_loc* loc,
980  bool suppressAccession) :
981  CFlatItem(&ctx), m_Feat(feat), m_Feat_Tree(ftree), m_Loc(loc ? loc :
982  (feat ? &feat.GetLocation() : nullptr)),
983  m_SuppressAccession(suppressAccession)
984 {
985  if (m_Feat) {
987 
989  const CSeq_annot_Handle& ah = feat.GetAnnot();
991  if (! seh) {
992  x_SetExternal();
993  }
994  }
995 }
996 
998 {
1000  *new CFlatSeqLoc(GetLoc(), *GetContext(), CFlatSeqLoc::eType_location, false, false, this->IsSuppressAccession()),
1001  m_Feat));
1002  if ( ff ) {
1003  x_FormatQuals(*ff);
1004  }
1005  return ff;
1006 }
1007 
1008 
1009 // -- CFeatureItem
1010 
1011 string CFeatureItem::GetKey(void) const
1012 {
1014 
1017 
1018  if (GetContext()->IsProt()) { // protein
1019  if ( IsMappedFromProt() && type == CSeqFeatData::e_Prot ) {
1020  if ( subtype == CSeqFeatData::eSubtype_preprotein ||
1025  return "Precursor";
1026  }
1027  }
1028  switch ( subtype ) {
1030  return "Region";
1032  return "Bond";
1034  return "Site";
1035  default:
1036  break;
1037  }
1038  } else { // nucleotide
1039  switch ( subtype ) {
1040 
1042  return "ncRNA";
1043 
1045  return "tmRNA";
1046 
1048  if ( !ctx.IsRefSeq() ) {
1049  return "misc_feature";
1050  }
1051  break;
1052 
1057  return "misc_feature";
1058 
1059  default:
1060  break;
1061  }
1062  }
1063 
1064  // deal with unmappable impfeats
1065  if (subtype == CSeqFeatData::eSubtype_imp && type == CSeqFeatData::e_Imp) {
1066  const CSeqFeatData::TImp& imp = m_Feat.GetData().GetImp();
1067  if ( imp.IsSetKey() ) {
1068  return imp.GetKey();
1069  }
1070  }
1071 
1072  if (type == CSeqFeatData::e_Imp) {
1073  switch ( subtype ) {
1086  return "regulatory";
1087  default:
1088  break;
1089  }
1090  }
1091 
1092  return CFeatureItemBase::GetKey();
1093 }
1094 
1095 
1096 // constructor from CSeq_feat
1098 (const CMappedFeat& feat,
1101  const CSeq_loc* loc,
1102  EMapped mapped,
1103  bool suppressAccession,
1104  CConstRef<CFeatureItem> parentFeatureItem) :
1105  CFeatureItemBase(feat, ctx, ftree, loc, suppressAccession), m_Mapped(mapped)
1106 {
1107  x_GatherInfoWithParent(ctx, parentFeatureItem);
1108 }
1109 
1111 {
1112  return eItem_Feature;
1113 }
1114 
1116 {
1117  if ( s_SkipFeature(GetFeat(), GetLoc(), ctx) ) {
1118  x_SetSkip();
1119  return;
1120  }
1122  x_AddQuals(ctx, parentFeatureItem );
1123 }
1124 
1125 // ----------------------------------------------------------------------------
1127  CBioseqContext& ctx )
1128 //
1129 // Note: /partial has been depricated since DEC-2001. Current policy is to
1130 // suppress /partial in entrez and release modes and let it stand in gbench and
1131 // dump modes
1132 // ----------------------------------------------------------------------------
1133 {
1134  if ( !ctx.Config().HideUnclassPartial() ) {
1135  if ( !IsMappedFromCDNA() || !ctx.IsProt() ) {
1136  if ( m_Feat.IsSetPartial() && m_Feat.GetPartial() ) {
1137  if ( eSeqlocPartial_Complete == sequence::SeqLocPartialCheck( GetLoc(), &ctx.GetScope() ) &&
1138  !s_LocIsFuzz( m_Feat, GetLoc() ) )
1139  {
1140  x_AddQual( eFQ_partial, new CFlatBoolQVal( true ) );
1141  }
1142  }
1143  }
1144  }
1145 }
1146 
1147 // ----------------------------------------------------------------------------
1150  CSeqFeatData::ESubtype subtype )
1151 // ----------------------------------------------------------------------------
1152 {
1153  if ( subtype == CSeqFeatData::eSubtype_operon ||
1154  subtype == CSeqFeatData::eSubtype_gap ) {
1155  return;
1156  }
1157 
1158  // bail if this type of object is not allowed to carry an operon
1160  return;
1161  }
1162 
1163  const CGene_ref* gene_ref = m_Feat.GetGeneXref();
1164  if (! gene_ref || ! gene_ref->IsSuppressed()) {
1165  const CSeq_loc& operon_loc = ( ctx.IsProt() || !IsMapped() ) ?
1166  m_Feat.GetLocation() : GetLoc();
1167  CConstRef<CSeq_feat> operon
1168  = GetOverlappingOperon( operon_loc, ctx.GetScope() );
1169  if ( operon ) {
1170  const string& operon_name = operon->GetNamedQual( "operon" );
1171  if ( !operon_name.empty() ) {
1172  x_AddQual(eFQ_operon, new CFlatStringQVal(operon_name));
1173  }
1174  }
1175  }
1176 }
1177 
1178 // ----------------------------------------------------------------------------
1181  CSeqFeatData::ESubtype subtype )
1182 // ----------------------------------------------------------------------------
1183 {
1184  _ASSERT( m_Feat.GetData().IsImp() );
1185 
1186  switch ( subtype ) {
1189  break;
1192  break;
1194  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("CAAT_signal"));
1195  break;
1198  break;
1200  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("minus_35_signal"));
1201  break;
1203  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("minus_10_signal"));
1204  break;
1206  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("GC_signal"));
1207  break;
1209  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("ribosome_binding_site"));
1210  break;
1212  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("polyA_signal_sequence"));
1213  break;
1215  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("attenuator"));
1216  break;
1218  x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("terminator"));
1219  break;
1222  break;
1223  default:
1224  break;
1225  }
1226 }
1227 
1228 // ----------------------------------------------------------------------------
1232  CSeqFeatData::ESubtype subtype,
1233  bool pseudo )
1234 // ----------------------------------------------------------------------------
1235 {
1236  if ( !pseudo ||
1238  subtype == CSeqFeatData::eSubtype_centromere ||
1239  subtype == CSeqFeatData::eSubtype_telomere )
1240  {
1241  return;
1242  }
1243 
1244  if (ctx.Config().DropIllegalQuals() &&
1246  {
1247  switch (subtype) {
1278  return;
1279  default:
1280  break;
1281  }
1282  }
1283  x_AddQual( eFQ_pseudo, new CFlatBoolQVal( true ) );
1284 }
1285 
1286 // ----------------------------------------------------------------------------
1288 // ----------------------------------------------------------------------------
1289 {
1290  string precursor_comment;
1291  // set precursor_comment, if needed.
1292  // It's set from the feature's product's best protein's comment
1293  if( GetContext()->IsProt() && IsMappedFromProt() && m_Feat.IsSetProduct() ) {
1294  const CSeq_id* prod_id = m_Feat.GetProduct().GetId();
1295  if (prod_id) {
1296  CBioseq_Handle prod_bioseq = GetContext()->GetScope().GetBioseqHandle(*prod_id);
1297  if( prod_bioseq ) {
1298  CMappedFeat best_prot_feat = s_GetBestProtFeature( prod_bioseq );
1299  if( best_prot_feat && best_prot_feat.IsSetComment() ) {
1300  precursor_comment = best_prot_feat.GetComment() ;
1301  }
1302  }
1303  }
1304  }
1305 
1306  if (m_Feat.IsSetComment()) {
1307  string comment = m_Feat.GetComment();
1308 
1309  TrimSpacesAndJunkFromEnds( comment, true );
1310  if ( ! comment.empty() && comment != "~" && comment != precursor_comment) {
1311  bool bAddPeriod = RemovePeriodFromEnd( comment, true );
1312  ConvertQuotes(comment);
1313  CRef<CFlatStringQVal> seqfeat_note( new CFlatStringQVal( comment ) );
1314 // if ( bAddPeriod && ! x_GetStringQual(eFQ_prot_desc ) ) {
1315  // careful! Period must be removed if we have a valid eFQ_prot_desc
1316  // Examples to test some cases: AB001488, M96268
1317  if ( bAddPeriod ) {
1318  seqfeat_note->SetAddPeriod();
1319  }
1320  x_AddQual( eFQ_seqfeat_note, seqfeat_note );
1321  }
1322  }
1323 
1324  /// also scan the annot to see if there is a comment there, if required
1325  if( ! ctx.ShowAnnotCommentAsCOMMENT() ) {
1329  if ((*it)->IsComment()) {
1330  const string & comment = (*it)->GetComment();
1331  // certain comments require special handling
1332  const static string ktRNAscanSE = "tRNA features were annotated by tRNAscan-SE";
1333  if( NStr::StartsWith(comment, ktRNAscanSE, NStr::eNocase) /* && ! x_HasMethodtRNAscanSE() */ )
1334  {
1336  // don't propagate tRNAscan-SE comments to irrelevant features
1337  continue;
1338  }
1339  }
1340  string comm = comment;
1341  TrimSpacesAndJunkFromEnds( comm, false );
1342  RemovePeriodFromEnd( comm, true );
1344  new CFlatStringQVal(comm));
1345  }
1346  }
1347  }
1348  }
1349 
1350 }
1351 
1352 // ----------------------------------------------------------------------------
1354  CBioseqContext& ctx )
1355 // ----------------------------------------------------------------------------
1356 {
1357  if ( ! m_Feat.IsSetExp_ev() ) {
1358  return;
1359  }
1360 
1361  string value;
1363  if ( ! x_GetGbValue( "experiment", value ) && ! x_GetGbValue( "inference", value ) ) {
1365  }
1366  }
1367  else if ( ! x_GetGbValue( "inference", value ) ) {
1369  }
1370 }
1371 
1372 static
1374  const CSeqFeatData& data )
1375 {
1376  switch( data.GetSubtype() ) {
1389  return true;
1390  default:
1391  return false;
1392  }
1393 }
1394 
1395 // ----------------------------------------------------------------------------
1397  CBioseqContext& ctx )
1398 //
1399 // Add any existing exception qualifiers.
1400 // Note: These include /ribosomal_slippage and /trans-splicing as special
1401 // cases. Also, some exceptions are listed as notes.
1402 // ----------------------------------------------------------------------------
1403 {
1404  const CSeqFeatData& data = m_Feat.GetData();
1405 
1406  string raw_exception;
1407 
1408  if ( ( m_Feat.IsSetExcept() && m_Feat.GetExcept() ) &&
1409  (m_Feat.IsSetExcept_text() && !m_Feat.GetExcept_text().empty()) ) {
1410  raw_exception = m_Feat.GetExcept_text();
1411  }
1412  if ( raw_exception == "" ) {
1413  return;
1414  }
1415 
1416  const bool bIsRefseq = ctx.IsRefSeq();
1417  // const bool bIsRelaxed = ( ! cfg.DropIllegalQuals() );
1418  const bool bIsRelaxed = ((! ctx.Config().IsModeRelease()) && (! ctx.Config().IsModeEntrez()));
1419 
1420  list<string> exceptions;
1421  NStr::Split( raw_exception, ",", exceptions, NStr::fSplit_Tokenize );
1422 
1423  list<string> output_exceptions;
1424  list<string> output_notes;
1425  ITERATE( list<string>, it, exceptions ) {
1426  string cur = NStr::TruncateSpaces( *it );
1427  if( cur.empty() ) {
1428  continue;
1429  }
1430 
1431  //
1432  // If exceptions are legal then it depends on the exception. Some are
1433  // turned into their own custom qualifiers. Others are allowed to stand
1434  // as exceptions, while others are turned into notes.
1435  //
1436  if ( s_IsValidExceptionText( cur ) ) {
1437  if( bIsRefseq || bIsRelaxed || data.IsCdregion() ) {
1438  output_exceptions.push_back( cur );
1439  } else {
1440  output_notes.push_back( cur );
1441  }
1442  continue;
1443  }
1444  if ( s_IsValidRefSeqExceptionText( cur ) ) {
1445  if( bIsRefseq || bIsRelaxed ) {
1446  output_exceptions.push_back( cur );
1447  } else {
1448  output_notes.push_back( cur );
1449  }
1450  continue;
1451  }
1452  if ( NStr::EqualNocase(cur, "ribosomal slippage") ) {
1453  if( data.IsCdregion() ) {
1455  } else {
1456  output_notes.push_back( cur );
1457  }
1458  continue;
1459  }
1460  if ( NStr::EqualNocase(cur, "trans-splicing") ) {
1461  if( s_TransSplicingFeatureAllowed( data ) ) {
1462  x_AddQual( eFQ_trans_splicing, new CFlatBoolQVal( true ) );
1463  } else {
1464  output_notes.push_back( cur );
1465  }
1466  continue;
1467  }
1468  if ( NStr::EqualNocase(cur, "circular RNA") ) {
1469  if( data.IsRna() || data.IsCdregion() ) {
1470  x_AddQual( eFQ_circular_RNA, new CFlatBoolQVal( true ) );
1471  } else {
1472  output_notes.push_back( cur );
1473  }
1474  continue;
1475  }
1476  const bool is_cds_or_mrna = ( data.IsCdregion() ||
1478  if( NStr::EqualNocase(cur, "artificial location") ) {
1479  if( is_cds_or_mrna ) {
1481  } else {
1482  output_notes.push_back( cur );
1483  }
1484  continue;
1485  }
1486  if( NStr::EqualNocase(cur, "heterogeneous population sequenced") ||
1487  NStr::EqualNocase(cur, "low-quality sequence region") )
1488  {
1489  if( is_cds_or_mrna ) {
1491  } else {
1492  output_notes.push_back( cur );
1493  }
1494  continue;
1495  }
1496  else {
1497  if ( bIsRelaxed ) {
1498  output_exceptions.push_back( cur );
1499  }
1500  else {
1501  output_notes.push_back( cur );
1502  }
1503  }
1504  }
1505  if ( ! output_exceptions.empty() ) {
1506  string exception = NStr::Join( output_exceptions, ", " );
1507  x_AddQual(eFQ_exception, new CFlatStringQVal( exception ) );
1508  }
1509  if ( ! output_notes.empty() ) {
1510  string note = NStr::Join( output_notes, ", " );
1512  }
1513 }
1514 
1515 // ----------------------------------------------------------------------------
1517  CConstRef<CSeq_feat> gene_feat )
1518 // ----------------------------------------------------------------------------
1519 {
1520  if ( ! gene_feat || ! gene_feat->IsSetComment() ) {
1521  return;
1522  }
1524  gene_feat->GetComment() ) );
1525 }
1526 
1527 // ----------------------------------------------------------------------------
1529  const CGene_ref* gene_ref,
1530  const CConstRef<CSeq_feat>& gene_feat )
1531 // ----------------------------------------------------------------------------
1532 {
1533  const CSeqFeatData& data = m_Feat.GetData();
1535 
1537  if ( ! gene_ref && gene_feat ) {
1538  gene_ref = &gene_feat->GetData().GetGene();
1539  if (gene_ref && gene_ref->IsSetDb()) {
1540  x_AddQual(
1541  eFQ_gene_xref, new CFlatXrefQVal( gene_ref->GetDb() ) );
1542  } else if ( gene_feat->IsSetDbxref() ) {
1543  x_AddQual(
1544  eFQ_gene_xref, new CFlatXrefQVal( gene_feat->GetDbxref() ) );
1545  }
1546  }
1547  }
1548 }
1549 
1550 // ----------------------------------------------------------------------------
1552  const CBioseqContext& ctx,
1553  CConstRef<CSeq_feat> gene_feat )
1554 //
1555 // For non-gene features, add /old_locus_tag, if one exists somewhere.
1556 // ----------------------------------------------------------------------------
1557 {
1558  if ( ! gene_feat ) {
1559  return;
1560  }
1561 
1562  if ( ctx.IsProt() ) {
1563  // skip if GenPept format and not gene or CDS
1564  const CSeqFeatData& data = m_Feat.GetData();
1565  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1566  if (subtype != CSeqFeatData::eSubtype_gene && subtype != CSeqFeatData::eSubtype_cdregion) {
1567  return;
1568  }
1569  }
1570 
1571  const CSeq_feat::TQual& quals = gene_feat->GetQual();
1572  for ( size_t iPos = 0; iPos < quals.size(); ++iPos ) {
1573  CRef< CGb_qual > qual = quals[ iPos ];
1574  if ( ! qual->IsSetQual() || ! qual->IsSetVal() ) {
1575  continue;
1576  }
1577  if ( qual->GetQual() == "old_locus_tag" ) {
1580  }
1581  }
1582 }
1583 
1584 // ----------------------------------------------------------------------------
1586  const CGene_ref* gene_ref,
1587  const CSeq_feat* gene_feat ) const
1588 // ----------------------------------------------------------------------------
1589 {
1590  const CSeqFeatData& data = m_Feat.GetData();
1592  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1593 
1594  bool pseudo = m_Feat.IsSetPseudo() ? m_Feat.GetPseudo() : false;
1595  if ( type != CSeqFeatData::e_Gene &&
1596  subtype != CSeqFeatData::eSubtype_operon &&
1597  subtype != CSeqFeatData::eSubtype_gap )
1598  {
1599  if ( gene_feat && gene_feat->IsSetPseudo() && gene_feat->GetPseudo() ) {
1600  return true;
1601  const CGene_ref* altref = &gene_feat->GetData().GetGene();
1602  if ( altref && altref->IsSetPseudo() && altref->GetPseudo() ) {
1603  return true;
1604  }
1605  }
1606  if ( gene_ref && gene_ref->IsSetPseudo() && gene_ref->GetPseudo() ) {
1607  return true;
1608  }
1609  }
1610  if ( type == CSeqFeatData::e_Gene ) {
1611  if ( data.GetGene().IsSetPseudo() && data.GetGene().GetPseudo() ) {
1612  return true;
1613  }
1614  }
1615  if ( type == CSeqFeatData::e_Rna ) {
1616  if ( data.GetRna().IsSetPseudo() && data.GetRna().GetPseudo() ) {
1617  return true;
1618  }
1619  }
1620  return pseudo;
1621 }
1622 
1625  CConstRef<CFeatureItem> parentFeatureItem )
1626 {
1627  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
1628  if (! idx) return;
1629  CBioseq_Handle hdl = ctx.GetHandle();
1630  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
1631  if (! bsx) return;
1632 
1633  const CSeqFeatData& data = m_Feat.GetData();
1635  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1636 
1637  bool is_not_genbank = false;
1638  {{
1639  ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1640  const CSeq_id& id = **id_iter;
1641 
1642  switch ( id.Which() ) {
1643  case CSeq_id_Base::e_Embl:
1644  case CSeq_id_Base::e_Ddbj:
1645  case CSeq_id_Base::e_Tpe:
1646  case CSeq_id_Base::e_Tpd:
1647  is_not_genbank = true;
1648  break;
1649  default:
1650  // do nothing
1651  break;
1652  }
1653  }
1654  }}
1655 
1656  const CGene_ref* gene_ref = nullptr;
1657  CConstRef<CSeq_feat> gene_feat;
1658  const CGene_ref* feat_gene_xref = nullptr;
1659  feat_gene_xref = m_Feat.GetGeneXref();
1660  if (! feat_gene_xref && parentFeatureItem) {
1661  feat_gene_xref = parentFeatureItem->GetFeat().GetGeneXref();
1662  }
1663  bool suppressed = false;
1664 
1665  const bool gene_forbidden_if_genbank =
1667  subtype == CSeqFeatData::eSubtype_centromere ||
1668  subtype == CSeqFeatData::eSubtype_telomere );
1669 
1670  if ( type == CSeqFeatData::e_Gene ) {
1671  } else if (subtype != CSeqFeatData::eSubtype_operon &&
1672  subtype != CSeqFeatData::eSubtype_gap &&
1673  (is_not_genbank || ! gene_forbidden_if_genbank)) {
1674  if (feat_gene_xref) {
1675  if (feat_gene_xref->IsSuppressed()) {
1676  suppressed = true;
1677  }
1678  }
1679 
1680  if (feat_gene_xref && ! suppressed) {
1681  // RW-943
1682  // gene_ref = feat_gene_xref;
1684  if (! ft) {
1685  if (parentFeatureItem) {
1686  // RW-985 fix for RW-943 dropping xrefs on sig_peptide and mat_peptide
1687  ft = bsx->GetFeatIndex (parentFeatureItem->GetFeat());
1688  } else {
1689  // SF-3276 BAM94483 coded_by CDS was not getting xref'd gene
1690  ft = bsx->GetFeatureForProduct();
1691  }
1692  }
1693  if (ft) {
1694  CRef<CFeatureIndex> fsx = ft->GetBestGene();
1695  if (fsx) {
1696  const CMappedFeat mf = fsx->GetMappedFeat();
1697  if (mf) {
1698  const CGene_ref* gr = nullptr;
1700  gf = &(mf.GetMappedFeature());
1701  gr = &(mf.GetData().GetGene());
1702  if (gr) {
1703  if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
1704  if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
1705  gene_feat = &(mf.GetMappedFeature());
1706  gene_ref = &(mf.GetData().GetGene());
1707  } else {
1708  // RW-985
1709  gene_ref = feat_gene_xref;
1710  }
1711  } else if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
1712  if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
1713  gene_feat = &(mf.GetMappedFeature());
1714  gene_ref = &(mf.GetData().GetGene());
1715  } else {
1716  // RW-985
1717  gene_ref = feat_gene_xref;
1718  }
1719  } else {
1720  // SF-3822 - map locus in xref to desc in gene
1721  gene_ref = feat_gene_xref;
1722  }
1723  }
1724  }
1725  } else {
1726  // RW-943
1727  gene_ref = feat_gene_xref;
1728  }
1729  } else if ( feat_gene_xref && (! suppressed) && subtype == CSeqFeatData::eSubtype_cdregion ) {
1730  // CAI12201 coded_by CDS on far embl record
1731  gene_ref = feat_gene_xref;
1732  }
1733  } else if ((! feat_gene_xref || ! suppressed) &&
1736  bool is_mapped = false;
1737  if (parentFeatureItem) {
1738  ft = bsx->GetFeatIndex (parentFeatureItem->GetFeat());
1739  if (ft) {
1740  if (subtype == CSeqFeatData::eSubtype_preprotein ||
1745  try {
1746  if ( m_Feat.IsSetXref() ) {
1747  feat_gene_xref = m_Feat.GetGeneXref();
1748  if ( feat_gene_xref ) {
1749  gene_ref = feat_gene_xref;
1750  is_mapped = true;
1751  }
1752  }
1753  if (! is_mapped) {
1754  CRef<CFeatureIndex> fsx = ft->GetBestGene();
1755  if (fsx) {
1756  const CMappedFeat mf = fsx->GetMappedFeat();
1757  if (mf) {
1758  gene_feat = &(mf.GetMappedFeature());
1759  gene_ref = &(mf.GetData().GetGene());
1760  is_mapped = true;
1761  }
1762  }
1763  }
1764  if (! is_mapped) {
1765  // e.g., check sig_peptide for gene overlapping parent CDS
1766  CSeq_feat_Handle parent_feat_handle;
1767  parent_feat_handle = parentFeatureItem->GetFeat();
1769  gene_feat, parent_feat_handle );
1770  is_mapped = true;
1771  }
1772  } catch (CException&) {}
1773  }
1774  }
1775  } else {
1776  ft = bsx->GetFeatIndex (m_Feat);
1777  if (! ft) {
1778  ft = bsx->GetFeatureForProduct();
1779  if (! ft) {
1780  // RW-1646
1781  CBioseq_Handle hdl = ctx.GetHandle();
1782  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
1783  const CRef<CSeqMasterIndex>& midx = idx->GetMasterIndex();
1784  CRef<feature::CFeatTree> ftree = midx->GetFeatTree();
1785  ftree->AddGenesForFeat(m_Feat, ctx.GetAnnotSelector());
1786  try {
1787  const CMappedFeat mf = ftree->GetBestGene(m_Feat);
1788  if (mf) {
1789  gene_feat = &(mf.GetMappedFeature());
1790  gene_ref = &(mf.GetData().GetGene());
1791  }
1792  } catch (CException&) {}
1793  }
1794  }
1795  }
1796  if (ft && (! is_mapped)) {
1797  CRef<CFeatureIndex> fsx = ft->GetBestGene();
1798  if (fsx) {
1799  const CMappedFeat mf = fsx->GetMappedFeat();
1800  if (mf) {
1801  gene_feat = &(mf.GetMappedFeature());
1802  gene_ref = &(mf.GetData().GetGene());
1803  }
1804  } else if (feat_gene_xref) {
1805  // last resort, e.g., MH013512 after first nuc-prot set
1806  gene_ref = feat_gene_xref;
1807  }
1808  }
1809  }
1810  }
1811 
1812  bool pseudo = x_GetPseudo(gene_ref, gene_feat );
1813  if ( ctx.IsEMBL() || ctx.IsDDBJ() ) {
1815  pseudo = false;
1816  }
1817  if ( type == CSeqFeatData::e_Prot ) {
1818  pseudo = false;
1819  }
1820  }
1821 
1822  //
1823  // Collect qualifiers that are specific to a single or just a few feature
1824  // types:
1825  //
1826  switch ( type ) {
1828  x_AddQualsCdregionIdx(m_Feat, ctx, pseudo);
1829  break;
1830  case CSeqFeatData::e_Rna:
1831  x_AddQualsRna(m_Feat, ctx, pseudo);
1832  break;
1833  case CSeqFeatData::e_Prot:
1834  x_AddQualsProt(ctx, pseudo);
1835  break;
1837  x_AddQualsRegion( ctx );
1838  break;
1839  case CSeqFeatData::e_Site:
1840  x_AddQualsSite( ctx );
1841  break;
1842  case CSeqFeatData::e_Bond:
1843  x_AddQualsBond( ctx );
1844  break;
1847  break;
1849  x_AddQualsNonStd( ctx );
1850  break;
1851  case CSeqFeatData::e_Het:
1852  x_AddQualsHet( ctx );
1853  break;
1856  break;
1857  default:
1858  break;
1859  }
1860 
1861  //
1862  // Collect qualifiers that are common to most feature types:
1863  //
1864  x_AddQualPartial( ctx );
1865  x_AddQualDbXref( ctx );
1866  x_AddQualExt();
1867  x_AddQualExpInv( ctx );
1870  x_AddQualNote( gene_feat );
1871  x_AddQualOldLocusTag( ctx, gene_feat );
1872  x_AddQualDb( gene_ref );
1873  x_AddQualGeneXref( gene_ref, gene_feat );
1874  if (bsx->HasOperon()) {
1875  x_AddQualOperon( ctx, subtype );
1876  }
1877  x_AddQualsGene( ctx, gene_ref, gene_feat, gene_ref ? false : gene_feat.NotEmpty() );
1878 
1879  x_AddQualPseudo( ctx, type, subtype, pseudo );
1880  x_AddQualsGb( ctx );
1881 
1882  // dynamic mapping of old features to regulatory with regulatory_class qualifier
1883  if ( type == CSeqFeatData::e_Imp ) {
1884  x_AddQualsRegulatoryClass ( ctx, subtype );
1885  }
1886 
1888 
1889  // cleanup (drop illegal quals, duplicate information etc.)
1890  x_CleanQuals( gene_ref );
1891 
1892 
1893 }
1894 
1895 // ----------------------------------------------------------------------------
1898  CConstRef<CFeatureItem> parentFeatureItem )
1899 //
1900 // Add the various qualifiers to this feature. Top level function.
1901 // ----------------------------------------------------------------------------
1902 {
1903 // /**fl**/
1904  // leaving this here since it's so useful for debugging purposes.
1905  //21822,22172
1906  /* if(
1907  (GetLoc().GetStart(eExtreme_Biological) == 21821 &&
1908  GetLoc().GetStop(eExtreme_Biological) == 22171) ||
1909  (GetLoc().GetStop(eExtreme_Biological) == 21821 &&
1910  GetLoc().GetStart(eExtreme_Biological) == 22171)
1911  ) {
1912  cerr << ""; // a do-nothing statement in case we forget to comment it out
1913  } */
1914 // /**fl**/
1915 
1916  if ( ctx.Config().IsFormatFTable() ) {
1917  x_AddFTableQuals( ctx );
1918  return;
1919  }
1920 
1921  if ( ctx.UsingSeqEntryIndex() ) {
1922  x_AddQualsIdx(ctx, parentFeatureItem);
1923  return;
1924  }
1925 
1926  // SQD-4444 : pass annot selector from the context structure
1927  m_Feat_Tree->AddGenesForFeat(m_Feat, ctx.GetAnnotSelector());
1928 
1929  //
1930  // Collect/Compute data that will be shared between several qualifier
1931  // collectors:
1932  //
1933  const CSeqFeatData& data = m_Feat.GetData();
1935  CSeqFeatData::ESubtype subtype = data.GetSubtype();
1936 // /**fl**/>>
1937 // if ( subtype == CSeqFeatData::eSubtype_sig_peptide_aa ||
1938 // subtype == CSeqFeatData::eSubtype_sig_peptide )
1939 // {
1940 // cerr << "Break" << endl;
1941 // }
1942 // <</**fl**/
1943 
1944  // check if this is some kind of Genbank record (some of the logic may be a little different in that case)
1945  bool is_not_genbank = false;
1946  {{
1947  ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1948  const CSeq_id& id = **id_iter;
1949 
1950  switch ( id.Which() ) {
1951  case CSeq_id_Base::e_Embl:
1952  case CSeq_id_Base::e_Ddbj:
1953  case CSeq_id_Base::e_Tpe:
1954  case CSeq_id_Base::e_Tpd:
1955  is_not_genbank = true;
1956  break;
1957  default:
1958  // do nothing
1959  break;
1960  }
1961  }
1962  }}
1963 
1964  const CGene_ref* gene_ref = nullptr;
1965  CConstRef<CSeq_feat> gene_feat;
1966  const CGene_ref* feat_gene_xref = m_Feat.GetGeneXref();
1967  bool suppressed = false;
1968 
1969  const bool gene_forbidden_if_genbank =
1971  subtype == CSeqFeatData::eSubtype_centromere ||
1972  subtype == CSeqFeatData::eSubtype_telomere );
1973 
1974  if ( type == CSeqFeatData::e_Gene ) {
1975  } else if (subtype != CSeqFeatData::eSubtype_operon &&
1976  subtype != CSeqFeatData::eSubtype_gap &&
1977  (is_not_genbank || ! gene_forbidden_if_genbank)) {
1978  if (feat_gene_xref) {
1979  if (feat_gene_xref->IsSuppressed()) {
1980  suppressed = true;
1981  }
1982  }
1983  if (feat_gene_xref && ! suppressed &&
1984  ! CGeneFinder::ResolveGeneXref(feat_gene_xref, ctx.GetTopLevelEntry())) {
1985  gene_ref = feat_gene_xref;
1986  } else if ((! feat_gene_xref || ! suppressed) &&
1988 
1989  bool is_mapped = false;
1990  try {
1991  CMappedFeat mapped_gene = ctx.GetFeatTree().GetBestGene(m_Feat);
1992  if (mapped_gene) {
1993  gene_feat = mapped_gene.GetOriginalSeq_feat();
1994  gene_ref = &gene_feat->GetData().GetGene();
1995  is_mapped = true;
1996  }
1997  } catch (CException&) {}
1998  if (! is_mapped) {
1999  try {
2000  CMappedFeat mapped_gene = m_Feat_Tree->GetBestGene(m_Feat);
2001  if (mapped_gene) {
2002  gene_feat = mapped_gene.GetOriginalSeq_feat();
2003  gene_ref = &gene_feat->GetData().GetGene();
2004  is_mapped = true;
2005  }
2006  } catch (CException&) {}
2007  }
2008  if (! is_mapped) {
2009  try {
2010  // e.g., check sig_peptide for gene overlapping parent CDS
2011  CSeq_feat_Handle parent_feat_handle;
2012  if( parentFeatureItem ) {
2013  parent_feat_handle = parentFeatureItem->GetFeat();
2015  gene_feat, parent_feat_handle );
2016  }
2017  } catch (CException&) {}
2018  }
2019  }
2020  }
2021 
2022  bool pseudo = x_GetPseudo(gene_ref, gene_feat );
2023 
2024  //
2025  // Collect qualifiers that are specific to a single or just a few feature
2026  // types:
2027  //
2028  switch ( type ) {
2030  x_AddQualsCdregion(m_Feat, ctx, pseudo);
2031  break;
2032  case CSeqFeatData::e_Rna:
2033  x_AddQualsRna(m_Feat, ctx, pseudo);
2034  break;
2035  case CSeqFeatData::e_Prot:
2036  x_AddQualsProt(ctx, pseudo);
2037  break;
2039  x_AddQualsRegion( ctx );
2040  break;
2041  case CSeqFeatData::e_Site:
2042  x_AddQualsSite( ctx );
2043  break;
2044  case CSeqFeatData::e_Bond:
2045  x_AddQualsBond( ctx );
2046  break;
2049  break;
2051  x_AddQualsNonStd( ctx );
2052  break;
2053  case CSeqFeatData::e_Het:
2054  x_AddQualsHet( ctx );
2055  break;
2058  break;
2059  default:
2060  break;
2061  }
2062 
2063  //
2064  // Collect qualifiers that are common to most feature types:
2065  //
2066  x_AddQualPartial( ctx );
2067  x_AddQualDbXref( ctx );
2068  x_AddQualExt();
2069  x_AddQualExpInv( ctx );
2072  x_AddQualNote( gene_feat );
2073  x_AddQualOldLocusTag( ctx, gene_feat );
2074  x_AddQualDb( gene_ref );
2075  x_AddQualGeneXref( gene_ref, gene_feat );
2076  x_AddQualOperon( ctx, subtype );
2077  x_AddQualsGene( ctx, gene_ref, gene_feat, gene_ref ? false : gene_feat.NotEmpty() );
2078 
2079  x_AddQualPseudo( ctx, type, subtype, pseudo );
2080  x_AddQualsGb( ctx );
2081 
2082  // dynamic mapping of old features to regulatory with regulatory_class qualifier
2083  if ( type == CSeqFeatData::e_Imp ) {
2084  x_AddQualsRegulatoryClass ( ctx, subtype );
2085  }
2086 
2088 
2089  // cleanup (drop illegal quals, duplicate information etc.)
2090  x_CleanQuals( gene_ref );
2091 }
2092 
2093 
2094 static const string s_TrnaList[] = {
2095  "tRNA-Gap",
2096  "tRNA-Ala",
2097  "tRNA-Asx",
2098  "tRNA-Cys",
2099  "tRNA-Asp",
2100  "tRNA-Glu",
2101  "tRNA-Phe",
2102  "tRNA-Gly",
2103  "tRNA-His",
2104  "tRNA-Ile",
2105  "tRNA-Xle",
2106  "tRNA-Lys",
2107  "tRNA-Leu",
2108  "tRNA-Met",
2109  "tRNA-Asn",
2110  "tRNA-Pyl",
2111  "tRNA-Pro",
2112  "tRNA-Gln",
2113  "tRNA-Arg",
2114  "tRNA-Ser",
2115  "tRNA-Thr",
2116  "tRNA-Sec",
2117  "tRNA-Val",
2118  "tRNA-Trp",
2119  "tRNA-OTHER",
2120  "tRNA-Tyr",
2121  "tRNA-Glx",
2122  "tRNA-TERM"
2123 };
2124 
2125 
2126 static const string& s_AaName(int aa)
2127 {
2128  int idx = 255;
2129 
2130  if (aa != '*') {
2131  idx = aa - 64;
2132  } else {
2133  idx = 27;
2134  }
2135  if ( idx > 0 && idx < ArraySize(s_TrnaList) ) {
2136  return s_TrnaList [idx];
2137  }
2138  return kEmptyStr;
2139 }
2140 
2141 
2142 static int s_ToIupacaa(int aa)
2143 {
2144  vector<char> n(1, static_cast<char>(aa));
2145  vector<char> i;
2147  return i.front();
2148 }
2149 
2150 // ----------------------------------------------------------------------------
2152  const CMappedFeat& feat,
2154  bool pseudo )
2155 // ----------------------------------------------------------------------------
2156 {
2157 
2159  const CRNA_ref& rna = feat.GetData().GetRna();
2160  const CFlatFileConfig& cfg = ctx.Config();
2161  CScope& scope = ctx.GetScope();
2162 
2163  ///
2164  /// always output transcript_id
2165  ///
2166  {{
2167  EFeatureQualifier slot =
2168  (ctx.IsRefSeq() || cfg.IsModeDump() || cfg.IsModeGBench()) ?
2170  try {
2171  if (feat.IsSetProduct()) {
2172  CConstRef<CSeq_id> sip(feat.GetProduct().GetId());
2173  if (sip) {
2174  CBioseq_Handle prod =
2175  scope.GetBioseqHandleFromTSE(*sip, ctx.GetHandle());
2176  if ( prod ) {
2177  x_AddProductIdQuals(prod, slot);
2178  } else {
2179  string acc;
2180  sip->GetLabel(&acc, CSeq_id::eBoth);
2183  if (besth) {
2184  acc.clear();
2185  besth.GetSeqId()->GetLabel(&acc, CSeq_id::eContent);
2186  }
2187  if( acc.empty() && ! cfg.DropIllegalQuals() ) {
2188  //sure of that? doesn't look right---
2189  x_AddQual(slot, new CFlatStringQVal(
2190  NStr::NumericToString(sip->GetGi()) ) );
2191  }
2192  if (!acc.empty()) {
2193  if ( !cfg.DropIllegalQuals() || IsValidAccession(acc)) {
2194  CRef<CSeq_id> acc_id(new CSeq_id(acc));
2195  x_AddQual(slot, new CFlatSeqIdQVal(*acc_id));
2196  }
2197  /*
2198  if (! (cfg.HideGI() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes())) {
2199  x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*sip, true));
2200  }
2201  */
2202  }
2203  }
2204  }
2205  }
2206  }
2207  catch (CObjmgrUtilException&) {
2208  }
2209  }}
2210 
2211  CRNA_ref::TType rna_type = rna.IsSetType() ?
2212  rna.GetType() : CRNA_ref::eType_unknown;
2213  switch ( rna_type ) {
2214  case CRNA_ref::eType_tRNA:
2215  {
2216  if ( !pseudo && ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) {
2217  CSeqVector vec(feat.GetLocation(), scope);
2219  string transcription;
2220  vec.GetSeqData(0, vec.size(), transcription);
2221  x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));
2222  }
2223  if (rna.IsSetExt()) {
2224  const CRNA_ref::C_Ext& ext = rna.GetExt();
2225  switch (ext.Which()) {
2227  {
2228  // amino acid could not be parsed into structured form
2229  if (!cfg.DropIllegalQuals()) {
2231  new CFlatStringQVal(ext.GetName()));
2232  } else {
2234  new CFlatStringQVal("tRNA-OTHER"));
2235  }
2236  break;
2237  }
2239  {
2240  const CTrna_ext& trna = ext.GetTRNA();
2241  int aa = 0;
2242  if ( trna.IsSetAa() && trna.GetAa().IsNcbieaa() ) {
2243  aa = trna.GetAa().GetNcbieaa();
2244  }
2245  if ( cfg.IupacaaOnly() ) {
2246  aa = s_ToIupacaa(aa);
2247  }
2248  const string& aa_str = s_AaName(aa);
2249  string amino_acid_str = aa_str;
2250 
2251  if ( !aa_str.empty() ) {
2252  const string& ac_str = aa_str;
2253  if (NStr::CompareNocase (ac_str, "tRNA-Met") == 0) {
2254  for (auto& gbqual : m_Feat.GetQual()) {
2255  if (!gbqual->IsSetQual() || !gbqual->IsSetVal()) continue;
2256  if (NStr::CompareNocase( gbqual->GetQual(), "product") != 0) continue;
2257  if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-fMet") == 0) {
2258  amino_acid_str = "tRNA-fMet";
2259  }
2260  if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-iMet") == 0) {
2261  amino_acid_str = "tRNA-iMet";
2262  }
2263  }
2264  } else if (NStr::CompareNocase (ac_str, "tRNA-Ile") == 0) {
2265  for (auto& gbqual : m_Feat.GetQual()) {
2266  if (!gbqual->IsSetQual() || !gbqual->IsSetVal()) continue;
2267  if (NStr::CompareNocase( gbqual->GetQual(), "product") != 0) continue;
2268  if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-Ile2") == 0) {
2269  amino_acid_str = "tRNA-Ile2";
2270  }
2271  }
2272  }
2273  x_AddQual(eFQ_product, new CFlatStringQVal(amino_acid_str));
2274  if ( trna.IsSetAnticodon() && !ac_str.empty() ) {
2276  new CFlatAnticodonQVal(trna.GetAnticodon(),
2277  ac_str.substr(5, NPOS)));
2278  }
2279  }
2280  if ( trna.IsSetCodon() ) {
2281  const string& comment =
2283  x_AddQual(eFQ_trna_codons, new CFlatTrnaCodonsQVal(trna, comment));
2284  }
2285  //x_AddQual(eFQ_exception_note, new CFlatStringQVal("tRNA features were annotated by tRNAscan-SE."));
2286  break;
2287  }
2288  default:
2289  break;
2290  } // end of internal switch
2291  }
2292  break;
2293  }
2294  case CRNA_ref::eType_mRNA:
2295  case CRNA_ref::eType_rRNA:
2296  {
2297  if ( !pseudo && ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) {
2298  CSeqVector vec(feat.GetLocation(), scope);
2300  string transcription;
2301  vec.GetSeqData(0, vec.size(), transcription);
2302  x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));
2303  }
2304  // intentional fall through
2305  }
2306  default:
2307  switch ( subtype ) {
2308 
2310  if ( ! rna.IsSetExt() ) {
2311  break;
2312  }
2313  const CRNA_ref_Base::TExt& ext = rna.GetExt();
2314  if ( ! ext.IsGen() ) {
2315  break;
2316  }
2317  break;
2318  }
2320  if ( ! rna.IsSetExt() ) {
2321  break;
2322  }
2323  const CRNA_ref_Base::TExt& ext = rna.GetExt();
2324  if ( ext.IsGen() && ext.GetGen().IsSetQuals() ) {
2325 
2326  const list< CRef< CRNA_qual > >& quals = ext.GetGen().GetQuals().Get();
2327  list< CRef< CRNA_qual > >::const_iterator it = quals.begin();
2328  for ( ; it != quals.end(); ++it ) {
2329  if ( (*it)->IsSetQual() && (*it)->IsSetVal() ) {
2330  if ( (*it)->GetQual() == "tag_peptide" ) {
2332  new CFlatStringQVal(
2333  (*it)->GetVal(), CFormatQual::eUnquoted ) );
2334  break;
2335  }
2336  }
2337  }
2338  }
2339  break;
2340  }
2343  if ( ! rna.IsSetExt() ) {
2344  break;
2345  }
2346  const CRNA_ref_Base::TExt& ext = rna.GetExt();
2347  if ( ext.IsName() ) {
2348  string strName = ext.GetName();
2349  if ( strName != "misc_RNA" ) {
2350  x_AddQual( eFQ_product, new CFlatStringQVal( strName ) );
2351  }
2352  }
2353  break;
2354  }
2355  default:
2356  if ( rna.IsSetExt() && rna.GetExt().IsName() ) {
2357  x_AddQual( eFQ_product, new CFlatStringQVal( rna.GetExt().GetName() ) );
2358  }
2359  break;
2360  }
2361  } // end of switch
2362 
2363  // some things to extract from RNA-gen
2364  if( rna.IsSetExt() && rna.GetExt().IsGen() ) {
2365  const CRNA_gen &gen = rna.GetExt().GetGen();
2366  if ( gen.IsSetClass() ) {
2367  if (gen.IsLegalClass()) {
2369  new CFlatStringQVal( gen.GetClass() ) );
2370  } else {
2372  new CFlatStringQVal( "other" ));
2374  new CFlatStringQVal( gen.GetClass() ) );
2375  }
2376  }
2377 
2378  if ( gen.IsSetProduct() && ! x_HasQual(eFQ_product) ) {
2380  new CFlatStringQVal( gen.GetProduct() ) );
2381  }
2382  }
2383 }
2384 
2385 // ----------------------------------------------------------------------------
2387  CBioseq_Handle& bsh,
2389  bool pseudo )
2390 // ----------------------------------------------------------------------------
2391 {
2392  const CFlatFileConfig& cfg = ctx.Config();
2393  CScope& scope = ctx.GetScope();
2394 
2395  if ( pseudo || cfg.NeverTranslateCDS() ) {
2396  return;
2397  }
2398 
2399  string translation;
2400  if ( cfg.AlwaysTranslateCDS() || (cfg.TranslateIfNoProduct() && !bsh) ) {
2402  translation, false /* don't include stops */);
2403  }
2404  else if ( bsh ) {
2405  CSeqVector seqv = bsh.GetSeqVector();
2406  /*
2407  CSeq_data::E_Choice coding = cfg.IupacaaOnly() ?
2408  CSeq_data::e_Iupacaa : CSeq_data::e_Ncbieaa;
2409  */
2411  seqv.SetCoding( coding );
2412 
2413  try {
2414  // an exception can occur here if the specified length doesn't match the actual length.
2415  // Although I don't know of any released .asn files with this problem, it can occur
2416  // in submissions.
2417  seqv.GetSeqData( 0, seqv.size(), translation );
2418  } catch( const CException & ) {
2419  // we're unable to do the translation
2420  translation.clear();
2421  }
2422  }
2423 
2424  if (!NStr::IsBlank(translation)) {
2425  x_AddQual(eFQ_translation, new CFlatStringQVal( translation ) );
2426  }
2427 }
2428 
2429 // ----------------------------------------------------------------------------
2431  const CCdregion& cdr,
2432  CBioseqContext& ctx )
2433 // ----------------------------------------------------------------------------
2434 {
2435  if ( ! cdr.IsSetCode() ) {
2436  return;
2437  }
2438  int gcode = cdr.GetCode().GetId();
2439  if ( gcode == 255 ) {
2440  return;
2441  }
2442  if ( ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() || gcode > 1 ) {
2444  }
2445 }
2446 
2447 // ----------------------------------------------------------------------------
2449  const CCdregion& cdr,
2450  CBioseqContext& ctx )
2451 // ----------------------------------------------------------------------------
2452 {
2453  CCdregion::TFrame frame = cdr.GetFrame();
2454  if (frame == CCdregion::eFrame_not_set)
2455  frame = CCdregion::eFrame_one;
2456 
2457  // codon_start qualifier is always shown for nucleotides and for proteins mapped
2458  // from cDNA, otherwise only when the frame is not 1.
2459  if ( !ctx.IsProt() || !IsMappedFromCDNA() || frame != CCdregion::eFrame_one ) {
2460  x_AddQual( eFQ_codon_start, new CFlatIntQVal( frame ) );
2461  }
2462 }
2463 
2464 // ----------------------------------------------------------------------------
2466  const CCdregion& cdr,
2468  const int inset )
2469 // ----------------------------------------------------------------------------
2470 {
2471  CCdregion::TFrame frame = cdr.GetFrame();
2472  if (frame == CCdregion::eFrame_not_set) {
2473  frame = CCdregion::eFrame_one;
2474  }
2475 
2476  if (inset == 1) {
2477  if (frame == CCdregion::eFrame_one) {
2478  frame = CCdregion::eFrame_three;
2479  } else if (frame == CCdregion::eFrame_two) {
2480  frame = CCdregion::eFrame_one;
2481  } else if (frame == CCdregion::eFrame_three) {
2482  frame = CCdregion::eFrame_two;
2483  }
2484  } else if (inset == 2) {
2485  if (frame == CCdregion::eFrame_one) {
2486  frame = CCdregion::eFrame_two;
2487  } else if (frame == CCdregion::eFrame_two) {
2488  frame = CCdregion::eFrame_three;
2489  } else if (frame == CCdregion::eFrame_three) {
2490  frame = CCdregion::eFrame_one;
2491  }
2492  }
2493 
2494  // codon_start qualifier is always shown for nucleotides and for proteins mapped
2495  // from cDNA, otherwise only when the frame is not 1.
2496  if ( !ctx.IsProt() || !IsMappedFromCDNA() || frame != CCdregion::eFrame_one ) {
2497  x_AddQual( eFQ_codon_start, new CFlatIntQVal( frame ) );
2498  }
2499 }
2500 
2501 // ----------------------------------------------------------------------------
2503  const CCdregion& cdr,
2504  CBioseqContext& ctx )
2505 // ----------------------------------------------------------------------------
2506 {
2507  if ( !ctx.IsProt() || !IsMappedFromCDNA() ) {
2508  if ( cdr.IsSetCode_break() ) {
2510  new CFlatCodeBreakQVal( cdr.GetCode_break() ) );
2511  }
2512  }
2513 }
2514 
2515 // ----------------------------------------------------------------------------
2517  const CCdregion& cdr,
2519  string& tr_ex )
2520 // ----------------------------------------------------------------------------
2521 {
2522  if ( !ctx.IsProt() || !IsMappedFromCDNA() ) {
2523  if ( cdr.IsSetCode_break() ) {
2525  new CFlatCodeBreakQVal( cdr.GetCode_break() ) );
2526  } else if ( tr_ex.length() > 0 ) {
2527  x_AddQual(eFQ_seqfeat_note, new CFlatStringQVal("unprocessed translation exception: " + tr_ex));
2528  }
2529  }
2530 }
2531 
2532 // ----------------------------------------------------------------------------
2534  const CCdregion& cdr,
2535  CBioseqContext& ctx )
2536 // ----------------------------------------------------------------------------
2537 {
2538  static const string conflict_msg =
2539  "Protein sequence is in conflict with the conceptual translation";
2540 
2541  const bool conflict_set = (cdr.IsSetConflict() && cdr.GetConflict());
2542 
2543  if (conflict_set)
2544  {
2545  if (!ctx.IsProt() || !IsMappedFromCDNA()) {
2546  bool has_prot = false;
2547  if (m_Feat.IsSetProduct() && m_Feat.GetProduct().GetId()) {
2548  has_prot = (sequence::GetLength(m_Feat.GetProduct(), &ctx.GetScope()) > 0);
2549  }
2550  if (has_prot) {
2551  x_AddQual(eFQ_prot_conflict, new CFlatStringQVal(conflict_msg));
2552  }
2553  }
2554  }
2555 }
2556 
2557 // ----------------------------------------------------------------------------
2559  CBioseqContext& ctx )
2560 // ----------------------------------------------------------------------------
2561 {
2562  //if ( ctx.IsProt() && IsMappedFromCDNA() ) {
2563  if ( ctx.IsProt() ) {
2565  }
2566 }
2567 
2568 // ----------------------------------------------------------------------------
2570  const CBioseq_Handle& protHandle )
2571 // ----------------------------------------------------------------------------
2572 {
2573  if ( ! protHandle ) {
2574  return;
2575  }
2576  CSeqdesc_CI comm( protHandle, CSeqdesc::e_Comment, 1 );
2577  if ( comm && !comm->GetComment().empty() ) {
2578  string comment = comm->GetComment();
2579 
2580  TrimSpacesAndJunkFromEnds( comment, true );
2581  /* const bool bAddPeriod = */ RemovePeriodFromEnd( comment, true );
2582  CFlatStringQVal *commentQVal = new CFlatStringQVal( comment );
2583  /* if( bAddPeriod ) {
2584  commentQVal->SetAddPeriod();
2585  } */
2586  x_AddQual( eFQ_prot_comment, commentQVal );
2587  }
2588 }
2589 
2590 // ----------------------------------------------------------------------------
2592  const CBioseq_Handle& protHandle )
2593 // ----------------------------------------------------------------------------
2594 {
2595  if ( ! protHandle ) {
2596  return;
2597  }
2598  CSeqdesc_CI mi( protHandle, CSeqdesc::e_Molinfo );
2599  if ( mi ) {
2600  CMolInfo::TTech prot_tech = mi->GetMolinfo().GetTech();
2601  if ( prot_tech > CMolInfo::eTech_standard &&
2602  prot_tech != CMolInfo::eTech_concept_trans &&
2603  prot_tech != CMolInfo::eTech_concept_trans_a ) {
2604  if ( !GetTechString( prot_tech ).empty() ) {
2606  "Method: " + GetTechString( prot_tech) ) );
2607  }
2608  }
2609  }
2610 }
2611 
2612 // ----------------------------------------------------------------------------
2615  CBioseq_Handle& protHandle,
2616  const CProt_ref*& protRef,
2617  CMappedFeat& protFeat,
2618  CConstRef<CSeq_id>& protId )
2619 // ----------------------------------------------------------------------------
2620 {
2621  const CFlatFileConfig& cfg = ctx.Config();
2622  CScope& scope = ctx.GetScope();
2623 
2624  protId.Reset( m_Feat.GetProduct().GetId() );
2625  if ( protId ) {
2626  if ( !cfg.AlwaysTranslateCDS() ) {
2628  if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2629  get_flag = CScope::eGetBioseq_All;
2630  }
2631  protHandle = scope.GetBioseqHandle(*protId, get_flag);
2632  }
2633  }
2634 
2635  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2636  if (! idx) return;
2637  CBioseq_Handle hdl = ctx.GetHandle();
2638  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
2639  if (! bsx) return;
2640 
2641 
2642  protRef = nullptr;
2643  if ( protHandle ) {
2644  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2645  if (! idx) return;
2646  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (protHandle);
2647  if (bsx) {
2649  if (pfx) {
2650  protFeat = pfx->GetMappedFeat();
2651  if ( protFeat ) {
2652  protRef = &( protFeat.GetData().GetProt() );
2653  }
2654  }
2655  } else {
2656  x_GetAssociatedProtInfo(ctx, protHandle, protRef, protFeat, protId);
2657  }
2658  }
2659 }
2660 
2661 // ----------------------------------------------------------------------------
2664  CBioseq_Handle& protHandle,
2665  const CProt_ref*& protRef,
2666  CMappedFeat& protFeat,
2667  CConstRef<CSeq_id>& protId )
2668 // ----------------------------------------------------------------------------
2669 {
2670  const CFlatFileConfig& cfg = ctx.Config();
2671  CScope& scope = ctx.GetScope();
2672 
2673  protId.Reset( m_Feat.GetProduct().GetId() );
2674  if ( protId ) {
2675  if ( !cfg.AlwaysTranslateCDS() ) {
2677  if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes() ) {
2678  get_flag = CScope::eGetBioseq_All;
2679  }
2680  protHandle = scope.GetBioseqHandle(*protId, get_flag);
2681  }
2682  }
2683 
2684  protRef = nullptr;
2685  if ( protHandle ) {
2686  protFeat = s_GetBestProtFeature( protHandle );
2687  if ( protFeat ) {
2688  protRef = &( protFeat.GetData().GetProt() );
2689  }
2690  }
2691 }
2692 
2693 // ----------------------------------------------------------------------------
2695  const CProt_ref* protRef,
2696  const CMappedFeat& protFeat )
2697 // ----------------------------------------------------------------------------
2698 {
2699  if ( ! protRef ) {
2700  return;
2701  }
2702  if ( protFeat.IsSetComment() ) {
2703  if ( protRef->GetProcessed() == CProt_ref::eProcessed_not_set ||
2705  string prot_note = protFeat.GetComment();
2706  TrimSpacesAndJunkFromEnds( prot_note, true );
2707  RemovePeriodFromEnd( prot_note, true );
2708  x_AddQual( eFQ_prot_note, new CFlatStringQVal( prot_note ) );
2709  }
2710  }
2711 }
2712 
2713 
2714 // ----------------------------------------------------------------------------
2717  const CBioseq_Handle& protHandle,
2718  CConstRef<CSeq_id> protId )
2719 // ----------------------------------------------------------------------------
2720 {
2721  if ( protHandle ) {
2722  CConstRef<CBioseq> pBioseq( protHandle.GetCompleteBioseq() );
2723 
2724  // extract the *one* usable general seq-id (if there is one)
2725  // (the loop sets pTheOneGeneralSeqId, or leaves it NULL
2726  // if there is zero or more than one usable general seqids)
2727  CConstRef<CSeq_id> pTheOneUsableGeneralSeqId;
2728  FOR_EACH_SEQID_ON_BIOSEQ(seqid_ci, *pBioseq) {
2729  const CSeq_id & seqid = **seqid_ci;
2730  if( ! seqid.IsGeneral() ) {
2731  // not just general, so ignore all of them
2732  pTheOneUsableGeneralSeqId.Reset();
2733  break;
2734  }
2735 
2736  const CDbtag & db_tag = seqid.GetGeneral();
2737 
2738  // db types to ignore
2739  static const char* const sc_IgnoredDbs[] = {
2740  "BankIt",
2741  "NCBIFILE",
2742  "PID",
2743  "SMART",
2744  "TMSMART",
2745  };
2746  typedef CStaticArraySet<const char*, PNocase> TIgnoredDbSet;
2747  DEFINE_STATIC_ARRAY_MAP(TIgnoredDbSet, sc_IgnoredDbSet, sc_IgnoredDbs );
2748 
2749  // get db and tag
2750  const string & sDb = GET_STRING_FLD_OR_BLANK(db_tag, Db);
2751  string sTag;
2752  if( FIELD_IS_SET(db_tag, Tag) ) {
2753  stringstream sTagStrm;
2754  db_tag.GetTag().AsString(sTagStrm);
2755  // swap faster than assignment
2756  sTagStrm.str().swap(sTag);
2757  }
2758 
2759  if( ! sDb.empty() && ! sTag.empty() &&
2760  sc_IgnoredDbSet.find(sDb.c_str()) == sc_IgnoredDbSet.end() )
2761  {
2762  if( pTheOneUsableGeneralSeqId ) {
2763  // more than one, so ignore all of them
2764  pTheOneUsableGeneralSeqId.Reset();
2765  break;
2766  } else {
2767  pTheOneUsableGeneralSeqId = *seqid_ci;
2768  }
2769  }
2770  }
2771 
2772  CSeq_id::E_Choice eLastRegularChoice = CSeq_id::e_not_set;
2773  FOR_EACH_SEQID_ON_BIOSEQ(seqid_ci, *pBioseq) {
2774  const CSeq_id & seqid = **seqid_ci;
2775 
2776  switch( seqid.Which() ) {
2778  case CSeq_id::e_Other:
2779  case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd:
2780  case CSeq_id::e_Gpipe:
2781  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2782  eLastRegularChoice = seqid.Which();
2783  break;
2784 
2785  case CSeq_id::e_Gi:
2786  if( seqid.GetGi() > ZERO_GI ) {
2787  const CFlatFileConfig& cfg = GetContext()->Config();
2788  if (! (cfg.HideGI() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes())) {
2789  if ( eLastRegularChoice == CSeq_id::e_not_set ) {
2790  // use as protein_id if it's the first usable one
2791  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2792  }
2793  x_AddQual( eFQ_db_xref, new CFlatSeqIdQVal( seqid, true ) );
2794  }
2795  }
2796  break;
2797 
2798  case CSeq_id::e_General:
2799  // show it if it's the *one* usable general seqid. otherwise, ignore
2800  if( *seqid_ci == pTheOneUsableGeneralSeqId ) {
2801  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2802  }
2803  break;
2804 
2805  default:
2806  // ignore other types
2807  break;
2808  }
2809  }
2810  } else if( protId ) {
2811 
2812  TGi gi = ZERO_GI;
2813  string prot_acc;
2814 
2815  // get gi and prot_acc
2816  if ( protId->IsGi() ) {
2817  gi = protId->GetGi();
2818  if( gi > ZERO_GI ) {
2819  try {
2820  prot_acc = GetAccessionForGi( gi, ctx.GetScope() );
2821  } catch ( CException& ) {}
2822  }
2823  } else {
2824 
2825  // swap is faster than assignment
2826  // protId->GetSeqIdString(true).swap( prot_acc );
2827  prot_acc = protId->GetSeqIdString(true);
2828 
2829  // find prot_acc and gi
2830  //const CTextseq_id* pTextSeq_id = protId->GetTextseq_Id();
2831  //if( pTextSeq_id ) {
2832  // stringstream protAccStrm;
2833  // pTextSeq_id->AsFastaString(protAccStrm);
2834  // // swap is faster than assignment
2835  // protAccStrm.str().swap( prot_acc );
2836 
2837  //}
2838  try {
2839  gi = ctx.GetScope().GetGi( CSeq_id_Handle::GetHandle(*protId) );
2840  } catch(CException &) {
2841  // could not get gi
2842  }
2843  }
2844 
2845  if( ! prot_acc.empty() ) {
2846  if ( ! ctx.Config().DropIllegalQuals() || IsValidAccession( prot_acc ) ) {
2847  try {
2848  CRef<CSeq_id> acc_id( new CSeq_id( prot_acc ) );
2849  x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( *acc_id ) );
2850  } catch( CException & ) {
2851  x_AddQual( eFQ_protein_id, new CFlatStringQVal(prot_acc) );
2852  }
2853  }
2854  }
2855 
2856  if( gi > ZERO_GI ) {
2857  CConstRef<CSeq_id> pGiSeqId(
2858  protId->IsGi() ?
2859  protId.GetPointer() :
2860  new CSeq_id(CSeq_id::e_Gi, gi) );
2861  x_AddQual( eFQ_db_xref, new CFlatSeqIdQVal( *pGiSeqId, true ) );
2862  }
2863  }
2864 }
2865 
2866 // ----------------------------------------------------------------------------
2869  const CProt_ref* protRef )
2870 // ----------------------------------------------------------------------------
2871 {
2872  if ( !protRef ) {
2873  return;
2874  }
2875 
2876  const CFlatFileConfig& cfg = ctx.Config();
2877  const CProt_ref::TName& names = protRef->GetName();
2878  if ( !names.empty() ) {
2879  if ( ! cfg.IsModeDump() ) {
2881  new CFlatStringQVal( names.front() ) );
2882  if ( names.size() > 1 ) {
2885  }
2886 
2887  } else {
2890  }
2891  }
2892  }
2893 }
2894 
2895 // ----------------------------------------------------------------------------
2897  const CProt_ref* protRef )
2898 // ----------------------------------------------------------------------------
2899 {
2900  if ( !protRef || !protRef->IsSetDesc() ) {
2901  return;
2902  }
2903 
2904  string desc = protRef->GetDesc();
2905  TrimSpacesAndJunkFromEnds( desc, true );
2906  bool add_period = RemovePeriodFromEnd( desc, true );
2907  CRef<CFlatStringQVal> prot_desc( new CFlatStringQVal( desc ) );
2908  if ( add_period ) {
2909  prot_desc->SetAddPeriod();
2910  }
2911  x_AddQual( eFQ_prot_desc, prot_desc );
2912 }
2913 
2914 // ----------------------------------------------------------------------------
2916  const CProt_ref* protRef )
2917 // ----------------------------------------------------------------------------
2918 {
2919  if ( !protRef || protRef->GetActivity().empty() ) {
2920  return;
2921  }
2922  ITERATE (CProt_ref::TActivity, it, protRef->GetActivity()) {
2924  }
2925 }
2926 
2927 // ----------------------------------------------------------------------------
2930  const CProt_ref* protRef )
2931 // ----------------------------------------------------------------------------
2932 {
2933  if ( !protRef || !protRef->IsSetEc() || protRef->GetEc().empty() ) {
2934  return;
2935  }
2936 
2937  const CFlatFileConfig& cfg = ctx.Config();
2938  ITERATE(CProt_ref::TEc, ec, protRef->GetEc()) {
2939  if ( !cfg.DropIllegalQuals() || s_IsLegalECNumber( *ec ) ) {
2941  }
2942  }
2943 }
2944 
2945 // ----------------------------------------------------------------------------
2947  const CMappedFeat& cds,
2949  bool pseudo)
2950 // ----------------------------------------------------------------------------
2951 {
2952  CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2953  if (! idx) return;
2954  CBioseq_Handle hdl = ctx.GetHandle();
2955  CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
2956  if (! bsx) return;
2957 
2958  if ( ctx.IsEMBL() || ctx.IsDDBJ() ) {
2959  pseudo = false;
2960  }
2961 
2962  const CCdregion& cdr = cds.GetData().GetCdregion();
2963 
2964  // const CSeq_loc& cdsloc = cds.GetLocation();
2965  const CSeq_loc& orgloc = cds.GetOriginalFeature().GetLocation();
2966  const CSeq_loc& bsploc = ctx.GetLocation();
2967 
2968  // cerr << "CDS " << MSerial_AsnText << cdsloc;
2969  // cerr << "ORG " << MSerial_AsnText << orgloc;
2970  // cerr << "BSP " << MSerial_AsnText << bsploc;
2971 
2972  int inset = 0;
2973  if ( ! ctx.GetLocation().IsWhole()) {
2974  if (bsploc.IsInt()) {
2975  const CSeq_interval& bspint = bsploc.GetInt();
2976  if ( orgloc.IsSetStrand() && orgloc.GetStrand() == eNa_strand_minus ) {
2977  CBioseq_Handle& hdl = ctx.GetHandle();
2978  if (hdl) {
2979  int pos = bspint.GetTo();
2980  // cerr << "PS " << pos << endl;
2981  const CSeq_id* bid = bsploc.GetId();
2982  ENa_strand strand = eNa_strand_minus;
2983  CSeq_id& cid = const_cast<CSeq_id&>(*bid);
2984  CConstRef<CSeq_loc> newloc(new CSeq_loc(cid, pos, pos, strand));
2985  // cerr << "NEW " << MSerial_AsnText << newloc;
2986  inset = sequence::LocationOffset(orgloc, *newloc, eOffset_FromStart, &ctx.GetScope());
2987  // cerr << "IS " << inset << endl;
2988  }
2989  } else {
2990  int pos = bspint.GetFrom();
2991  // cerr << "PS " << pos << endl;
2992  const CSeq_id* bid = bsploc.GetId();
2993  ENa_strand strand = eNa_strand_plus;
2994  CSeq_id& cid = const_cast<CSeq_id&>(*bid);
2995  CConstRef<CSeq_loc> newloc(new CSeq_loc(cid, pos, pos, strand));
2996  // cerr << "NEW " << MSerial_AsnText << newloc;
2997  inset = sequence::LocationOffset(orgloc, *newloc, eOffset_FromStart, &ctx.GetScope());
2998  // cerr << "IS " << inset << endl;
2999  }
3000  }
3001  }
3002  if (inset < 0) {
3003  inset = 0;
3004  }
3005  inset = (inset % 3);
3006 
3007  const CProt_ref* protRef = nullptr;
3008  CMappedFeat protFeat;
3009  CConstRef<CSeq_id> prot_id;
3010 
3011  string tr_ex;
3012  for (auto& gbqual : cds.GetQual()) {
3013  if (!gbqual->IsSetQual() || !gbqual->IsSetVal()) continue;
3014  if (NStr::CompareNocase( gbqual->GetQual(), "transl_except") != 0) continue;
3015  tr_ex = gbqual->GetVal ();
3016  break;
3017  }
3018  TQI it = m_Quals.begin();
3019  while ( it != m_Quals.end() ) {
3020  if ( it->first == eFQ_transl_except ) {
3021  it = m_Quals.Erase(it);
3022  } else {
3023  ++it;
3024  }
3025  }
3026 
3028  x_AddQualCodonStartIdx( cdr, ctx, inset );
3029  x_AddQualTranslationExceptionIdx( cdr, ctx, tr_ex );
3030  x_AddQualProteinConflict( cdr, ctx );
3031  x_AddQualCodedBy( ctx );
3032  if ( ctx.IsProt() && IsMappedFromCDNA() ) {
3033  return;
3034  }
3035 
3036  // protein qualifiers
3037  if (m_Feat.IsSetProduct()) {
3039  ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
3040  x_GetAssociatedProtInfoIdx( ctx, prot, protRef, protFeat, prot_id );
3043  x_AddQualProtNote( protRef, protFeat );
3044  x_AddQualProteinId( ctx, prot, prot_id );
3045  x_AddQualTranslation( prot, ctx, pseudo );
3046  }
3047 
3048  // add qualifiers where associated xref overrides the ref:
3049  const CProt_ref* protXRef = m_Feat.GetProtXref();
3050  if ( ! protXRef ) {
3051  protXRef = protRef;
3052  }
3053  x_AddQualCdsProduct( ctx, protXRef );
3054  x_AddQualProtDesc( protXRef );
3055  x_AddQualProtActivity( protXRef );
3056  x_AddQualProtEcNumber( ctx, protXRef );
3057 }
3058 
3059 // ----------------------------------------------------------------------------
3061  const CMappedFeat& cds,
3063  bool pseudo)
3064 // ----------------------------------------------------------------------------
3065 {
3066  const CCdregion& cdr = cds.GetData().GetCdregion();
3067 
3068  const CProt_ref* protRef = nullptr;
3069  CMappedFeat protFeat;
3070  CConstRef<CSeq_id> prot_id;
3071 
3073  x_AddQualCodonStart( cdr, ctx );
3075  x_AddQualProteinConflict( cdr, ctx );
3076  x_AddQualCodedBy( ctx );
3077  if ( ctx.IsProt() && IsMappedFromCDNA() ) {
3078  return;
3079  }
3080 
3081  // protein qualifiers
3082  if (m_Feat.IsSetProduct()) {
3084  ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
3085  x_GetAssociatedProtInfo( ctx, prot, protRef, protFeat, prot_id );
3088  x_AddQualProtNote( protRef, protFeat );
3089  x_AddQualProteinId( ctx, prot, prot_id );
3090  x_AddQualTranslation( prot, ctx, pseudo );
3091  }
3092 
3093  // add qualifiers where associated xref overrides the ref:
3094  const CProt_ref* protXRef = m_Feat.GetProtXref();
3095  if ( ! protXRef ) {
3096  protXRef = protRef;
3097  }
3098  x_AddQualCdsProduct( ctx, protXRef );
3099  x_AddQualProtDesc( protXRef );
3100  x_AddQualProtActivity( protXRef );
3101  x_AddQualProtEcNumber( ctx, protXRef );
3102 }
3103 
3104 static int s_ScoreSeqIdHandle(const CSeq_id_Handle& idh)
3105 {
3106  CConstRef<CSeq_id> id = idh.GetSeqId();
3107  CRef<CSeq_id> id_non_const
3108  (const_cast<CSeq_id*>(id.GetPointer()));
3109  return CSeq_id::Score(id_non_const);
3110 }
3111 
3112 
3114 {
3115  //
3116  // Objective:
3117  // Find the best choice among a given subset of id types. I.e. if a certain
3118  // id scores well but is not of a type we approve of, we still reject it.
3119  //
3122 
3123  ITERATE( CBioseq_Handle::TId, it, ids ) {
3124  switch( (*it).Which() ) {
3125  case CSeq_id::e_Genbank:
3126  case CSeq_id::e_Embl:
3127  case CSeq_id::e_Ddbj:
3128  case CSeq_id::e_Gi:
3129  case CSeq_id::e_Other:
3130  case CSeq_id::e_General:
3131  case CSeq_id::e_Tpg:
3132  case CSeq_id::e_Tpe:
3133  case CSeq_id::e_Tpd:
3134  case CSeq_id::e_Gpipe:
3135  tracker(*it);
3136  break;
3137  default:
3138  break;
3139  }
3140  }
3141  return tracker.GetBestChoice();
3142 }
3143 
3144 // ---------------------------------------------------------------------------
3146  CBioseq_Handle& prod,
3147  EFeatureQualifier slot)
3148 // ---------------------------------------------------------------------------
3149 {
3150  //
3151  // Objective (according to the C toolkit):
3152  // We need one (and only one) /xxx_id tag. If there are multiple ids
3153  //
3154 
3155  if (!prod) {
3156  return;
3157  }
3158  const CBioseq_Handle::TId& ids = prod.GetId();
3159  if (ids.empty()) {
3160  return;
3161  }
3162 
3163  CSeq_id_Handle best = s_FindBestIdChoice(ids);
3164  if (!best) {
3165  return;
3166  }
3167  x_AddQual(slot, new CFlatSeqIdQVal(*best.GetSeqId()));
3168 
3169  if( m_Feat.GetData().IsCdregion() || ! GetContext()->IsProt() ) {
3170  const CFlatFileConfig& cfg = GetContext()->Config();
3171  ITERATE( CBioseq_Handle::TId, id_iter, ids ) {
3172  if( id_iter->IsGi() ) {
3173  if (! (cfg.HideGI() || cfg.IsPolicyFtp() || cfg.IsPolicyGenomes())) {
3175  new CFlatStringQVal("GI:" + NStr::NumericToString(id_iter->GetGi()) ));
3176  }
3177  }
3178  }
3179  }
3180 }
3181 
3182 // ----------------------------------------------------------------------------
3184  CBioseqContext& ctx )
3185 // ----------------------------------------------------------------------------
3186 {
3187  _ASSERT( m_Feat.GetData().IsRegion() );
3188 
3189  //cerr << MSerial_AsnText << m_Feat.GetOriginalFeature();
3190 
3191  const CSeqFeatData& data = m_Feat.GetData();
3192  const string &region = data.GetRegion();
3193  if ( region.empty() ) {
3194  return;
3195  }
3196 
3197  if ( ctx.IsProt() &&
3199  {
3201  } else {
3202  x_AddQual(eFQ_region, new CFlatStringQVal("Region: " + region));
3203  }
3204 
3205  /// parse CDD data from the user object
3206  list< CConstRef<CUser_object> > objs;
3207  if (m_Feat.IsSetExt()) {
3208  objs.push_back(CConstRef<CUser_object>(&m_Feat.GetExt()));
3209  }
3210  if (m_Feat.IsSetExts()) {
3211  copy(m_Feat.GetExts().begin(), m_Feat.GetExts().end(),
3212  back_inserter(objs));
3213  }
3214 
3215  ITERATE (list< CConstRef<CUser_object> >, it, objs) {
3216  const CUser_object& obj = **it;
3217  bool found = false;
3218  if (obj.IsSetType() &&
3219  obj.GetType().IsStr() &&
3220  obj.GetType().GetStr() == "cddScoreData") {
3221  CConstRef<CUser_field> f = obj.GetFieldRef("definition");
3222  if (f) {
3223  CUser_field_Base::C_Data::TStr definition_str = f->GetData().GetStr();
3224  RemovePeriodFromEnd(definition_str, true);
3225  if( ! s_StrEqualDisregardFinalPeriod(definition_str, region, NStr::eNocase) ) {
3227  new CFlatStringQVal(definition_str));
3228  found = true;
3229  }
3230  break;
3231 
3232  /**
3233  if (ctx.IsProt()) {
3234  if (f->GetData().GetStr() != region || added_raw) {
3235  x_AddQual(eFQ_region,
3236  new CFlatStringQVal(f->GetData().GetStr()));
3237  }
3238  } else {
3239  x_AddQual(eFQ_region,
3240  new CFlatStringQVal(f->GetData().GetStr()));
3241  }
3242 
3243  found = true;
3244  break;
3245  **/
3246 
3247  /**
3248  if (ctx.IsProt() && region == f->GetData().GetStr()) {
3249  /// skip
3250  } else {
3251  x_AddQual(eFQ_region,
3252  new CFlatStringQVal(f->GetData().GetStr()));
3253  found = true;
3254  break;
3255  }
3256  **/
3257  }
3258  }
3259 
3260  if (found) {
3261  break;
3262  }
3263  }
3264 }
3265 
3266 
3267 // ----------------------------------------------------------------------------
3269  CBioseqContext& ctx )
3270 // ----------------------------------------------------------------------------
3271 {
3272  _ASSERT( m_Feat.GetData().IsBond() );
3273 
3274  const CSeqFeatData& data = m_Feat.GetData();
3275  const string& bond = s_GetBondName( data.GetBond() );
3276  if ( NStr::IsBlank( bond ) ) {
3277  return;
3278  }
3279 
3280  if ( ( ctx.IsGenbankFormat() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) && ctx.IsProt() ) {
3281  x_AddQual( eFQ_bond_type, new CFlatStringQVal( bond ) );
3282  } else {
3283  x_AddQual( eFQ_bond, new CFlatBondQVal( bond ) );
3284  }
3285 }
3286 
3287 // ----------------------------------------------------------------------------
3289  CBioseqContext& ctx )
3290 // ----------------------------------------------------------------------------
3291 {
3293 
3294  const CSeqFeatData& data = m_Feat.GetData();
3295 
3296  CSeqFeatData_Base::TPsec_str sec_str_type = data.GetPsec_str();
3297 
3298  string sec_str_as_str = CSeqFeatData_Base::ENUM_METHOD_NAME(EPsec_str)()->FindName(sec_str_type, true);
3299  x_AddQual( eFQ_sec_str_type, new CFlatStringQVal( sec_str_as_str ) );
3300 }
3301 
3302 // ----------------------------------------------------------------------------
3304  CBioseqContext& ctx )
3305 // ----------------------------------------------------------------------------
3306 {
3308 
3309  const CSeqFeatData& data = m_Feat.GetData();
3310 
3312 
3313  x_AddQual( eFQ_non_std_residue, new CFlatStringQVal( n_s_res ) );
3314 }
3315 
3316 // ----------------------------------------------------------------------------
3318  CBioseqContext& ctx )
3319 // ----------------------------------------------------------------------------
3320 {
3321  _ASSERT( m_Feat.GetData().IsHet() );
3322 
3323  const CSeqFeatData& data = m_Feat.GetData();
3324 
3325  CSeqFeatData_Base::THet het = data.GetHet();
3326 
3327  x_AddQual( eFQ_heterogen, new CFlatStringQVal( het.Get() ) );
3328 }
3329 
3330 // ----------------------------------------------------------------------------
3332  CBioseqContext& ctx )
3333 // ----------------------------------------------------------------------------
3334 {
3336 
3337  const CSeqFeatData& data = m_Feat.GetData();
3339 
3340  // Make the /db_xref qual
3341  if( variation.CanGetId() ) {
3342  const CVariation_ref_Base::TId& dbt = variation.GetId();
3343  // the id tag is quite specific (e.g. db must be "dbSNP", etc.) or it won't print
3344  if ( dbt.IsSetDb() && !dbt.GetDb().empty() &&
3345  dbt.IsSetTag() && dbt.GetTag().IsStr() ) {
3346  const string &oid_str = dbt.GetTag().GetStr();
3347  if( dbt.GetDb() == "dbSNP" && NStr::StartsWith(oid_str, "rs" ) ) {
3348  x_AddQual(eFQ_db_xref, new CFlatStringQVal( dbt.GetDb() + ":" + oid_str.substr( 2 ) ) );
3349  }
3350  }
3351  }
3352 
3353  // Make the /replace quals:
3354  if( variation.CanGetData() && variation.GetData().IsInstance() &&
3355  variation.GetData().GetInstance().CanGetDelta() ) {
3356  const CVariation_inst_Base::TDelta& delta = variation.GetData().GetInstance().GetDelta();
3357  ITERATE( CVariation_inst_Base::TDelta, delta_iter, delta ) {
3358  if( *delta_iter && (*delta_iter)->CanGetSeq() ) {
3359  const CDelta_item_Base::TSeq& seq = (*delta_iter)->GetSeq();
3360  if( seq.IsLiteral() && seq.GetLiteral().CanGetSeq_data() ) {
3361  const CDelta_item_Base::C_Seq::TLiteral& seq_literal = seq.GetLiteral();
3362  const CSeq_literal_Base::TSeq_data& seq_data = seq_literal.GetSeq_data();
3363 
3364  // convert the data to the standard a,c,g,t
3365  CSeq_data iupacna_seq_data;
3366  CSeqportUtil::Convert( seq_data,
3367  &iupacna_seq_data,
3369  string nucleotides = iupacna_seq_data.GetIupacna().Get();
3370 
3371  // if the specified length and the length of the data conflict,
3372  // use the smaller
3373  const string::size_type max_len_allowed = seq_literal.GetLength();
3374  if( nucleotides.size() > max_len_allowed ) {
3375  nucleotides.resize( max_len_allowed );
3376  }
3377 
3378  NStr::ToLower( nucleotides );
3379 
3380  if (!NStr::IsBlank(nucleotides)) {
3381  x_AddQual(eFQ_replace, new CFlatStringQVal(nucleotides));
3382  }
3383  }
3384  }
3385  }
3386  }
3387 }
3388 
3390 {
3391  static const string kOther = "other";
3392  static const string kDnaBinding = "DNA binding";
3393  static const string kInhibit = "inhibition";
3394 
3395  switch (site) {
3397  return kOther;
3399  return kDnaBinding;
3401  return kInhibit;
3402 
3403  default:
3404  return CSeqFeatData::ENUM_METHOD_NAME(ESite)()->FindName(site, true);
3405  }
3406 }
3407 
3408 // ----------------------------------------------------------------------------
3410  CBioseqContext& ctx )
3411 // ----------------------------------------------------------------------------
3412 {
3413  _ASSERT( m_Feat.GetData().IsSite() );
3414 
3415  const CSeqFeatData& data = m_Feat.GetData();
3416  CSeqFeatData::TSite site = data.GetSite();
3417  const string& site_name = s_GetSiteName( site );
3418 
3419  // ID-4627 : site_type qualifier is needed for GBSeq/INSDSeq XMl too
3420  if ( (ctx.Config().IsFormatGenbank() ||
3421  ctx.Config().IsFormatGBSeq() ||
3422  ctx.Config().IsFormatINSDSeq()) && ctx.IsProt() ) {
3423  x_AddQual(eFQ_site_type, new CFlatSiteQVal( site_name ) );
3424  } else {
3425  if ( !m_Feat.IsSetComment() ||
3426  ( NStr::Find( m_Feat.GetComment(), site_name ) == NPOS ) ) {
3427  x_AddQual( eFQ_site, new CFlatSiteQVal( site_name ) );
3428  }
3429  }
3430 }
3431 
3432 // ----------------------------------------------------------------------------
3434  const CUser_field& field, const CSeq_feat::TExt& ext )
3435 // ----------------------------------------------------------------------------
3436 {
3437  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
3438  const string& oid = field.GetLabel().GetStr();
3439  if ( oid == "ModelEvidence" ) {
3440  FOR_EACH_GBQUAL_ON_SEQFEAT (gbq_itr, m_Feat) {
3441  const CGb_qual& gbq = **gbq_itr;
3442  if (gbq.IsSetQual()) {
3443  if (NStr::Equal (gbq.GetQual(), "experiment")) return;
3444  }
3445  }
3447  } else if ( oid == "Process" || oid == "Component" || oid == "Function" ) {
3448  x_AddGoQuals(field);
3449  }
3450  }
3451 }
3452 
3453 // ----------------------------------------------------------------------------
3455  const CSeq_feat::TExt& ext )
3456 // ----------------------------------------------------------------------------
3457 {
3458  ITERATE (CUser_object::TData, it, ext.GetData()) {
3459  const CUser_field& field = **it;
3460  if ( !field.IsSetData() ) {
3461  continue;
3462  }
3463  if ( field.GetData().IsObject() ) {
3464  const CUser_object& obj = field.GetData().GetObject();
3465  x_AddQualsExt(obj);
3466  } else if ( field.GetData().IsObjects() ) {
3468  x_AddQualsExt(**o);
3469  }
3470  } else if ( field.GetData().IsFields() ) {
3472  // x_AddGoQuals(**o);
3473  x_AddQualsExt(**o, ext);
3474  }
3475  }
3476  }
3477  if ( ext.IsSetType() && ext.GetType().IsStr() ) {
3478  const string& oid = ext.GetType().GetStr();
3479  if ( oid == "ModelEvidence" ) {
3480  FOR_EACH_GBQUAL_ON_SEQFEAT (gbq_itr, m_Feat) {
3481  const CGb_qual& gbq = **gbq_itr;
3482  if (gbq.IsSetQual()) {
3483  if (NStr::Equal (gbq.GetQual(), "experiment")) return;
3484  }
3485  }
3487  } else if ( oid == "GeneOntology" ) {
3488  x_AddGoQuals(ext);
3489  }
3490  }
3491 }
3492 
3493 // ----------------------------------------------------------------------------
3495  CBioseqContext& ctx )
3496 // ----------------------------------------------------------------------------
3497 {
3498  if ( m_Feat.IsSetProduct() &&
3499  ( !m_Feat.GetData().IsCdregion() && ctx.IsProt() && ! IsMappedFromProt() ) ) {
3500  CBioseq_Handle prod =
3501  ctx.GetScope().GetBioseqHandle( m_Feat.GetProductId() );
3502  if ( prod ) {
3503  const CBioseq_Handle::TId& ids = prod.GetId();
3504  if ( ! ids.empty() ) {
3505  ITERATE (CBioseq_Handle::TId, it, ids) {
3506  if ( it->Which() != CSeq_id::e_Gi ) {
3507  continue;
3508  }
3509  CConstRef<CSeq_id> id = it->GetSeqId();
3510  if (!id->IsGeneral()) {
3511  x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*id, id->IsGi()));
3512  }
3513  }
3514  }
3515  }
3516  }
3517  if ( ! m_Feat.IsSetDbxref() ) {
3518  return ;
3519  }
3521 }
3522 
3523 // ----------------------------------------------------------------------------
3525  const CUser_field& field )
3526 // ----------------------------------------------------------------------------
3527 {
3528  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
3529  const string& label = field.GetLabel().GetStr();
3530  EFeatureQualifier slot = eFQ_none;
3531  if ( label == "Process" ) {
3532  slot = eFQ_go_process;
3533  } else if ( label == "Component" ) {
3534  slot = eFQ_go_component;
3535  } else if ( label == "Function" ) {
3536  slot = eFQ_go_function;
3537  }
3538  if ( slot == eFQ_none ) {
3539  return;
3540  }
3541 
3543  if ( (*it)->GetData().IsFields() ) {
3544  CRef<CFlatGoQVal> go_val( new CFlatGoQVal(**it) );
3545 
3546  bool okay_to_add = true;
3547 
3548  // check for dups
3549  CFeatureItem::TQCI iter = x_GetQual(slot);
3550  for ( ; iter != m_Quals.end() && iter->first == slot; ++iter) {
3551  const CFlatGoQVal & qual = dynamic_cast<const CFlatGoQVal &>( *iter->second );
3552  if( qual.Equals(*go_val) )
3553  {
3554  okay_to_add = false;
3555  break;
3556  }
3557  }
3558 
3559  if( okay_to_add ) {
3560  x_AddQual(slot, go_val);
3561  }
3562  }
3563  }
3564  }
3565 }
3566 
3567 // ----------------------------------------------------------------------------
3569  const CUser_object& uo )
3570 // ----------------------------------------------------------------------------
3571 {
3572  ITERATE (CUser_object::TData, uf_it, uo.GetData()) {
3573  const CUser_field& field = **uf_it;
3574  if ( field.IsSetLabel() && field.GetLabel().IsStr() ) {
3575  const string& label = field.GetLabel().GetStr();
3576  EFeatureQualifier slot = eFQ_none;
3577  if ( label == "Process" ) {
3578  slot = eFQ_go_process;
3579  } else if ( label == "Component" ) {
3580  slot = eFQ_go_component;
3581  } else if ( label == "Function" ) {
3582  slot = eFQ_go_function;
3583  }
3584  if ( slot == eFQ_none ) {
3585  continue;
3586  }
3587 
3589  if ( (*it)->GetData().IsFields() ) {
3590  CRef<CFlatGoQVal> go_val( new CFlatGoQVal(**it) );
3591 
3592  bool okay_to_add = true;
3593 
3594  // check for dups
3595  CFeatureItem::TQCI iter = x_GetQual(slot);
3596  for ( ; iter != m_Quals.end() && iter->first == slot; ++iter) {
3597  const CFlatGoQVal & qual = dynamic_cast<const CFlatGoQVal &>( *iter->second );
3598  if( qual.Equals(*go_val) )
3599  {
3600  okay_to_add = false;
3601  break;
3602  }
3603  }
3604 
3605  if( okay_to_add ) {
3606  x_AddQual(slot, go_val);
3607  }
3608  }
3609  }
3610  }
3611  }
3612 }
3613 
3614 // ----------------------------------------------------------------------------
3616  const CBioseqContext& ctx,
3617  const CGene_ref* gene_ref,
3618  CConstRef<CSeq_feat>& gene_feat,
3619  bool from_overlap )
3620 // ----------------------------------------------------------------------------
3621 {
3622  const CSeqFeatData& data = m_Feat.GetData();
3623  CSeqFeatData::ESubtype subtype = data.GetSubtype();
3624 
3625  if ( m_Feat.GetData().Which() == CSeqFeatData::e_Gene ) {
3626  gene_ref = &( m_Feat.GetData().GetGene() );
3627  }
3628  if ( ! gene_ref && gene_feat ) {
3629  gene_ref = & gene_feat->GetData().GetGene();
3630  }
3631 
3632  if ( ! gene_ref || gene_ref->IsSuppressed() ) {
3633  return;
3634  }
3635 
3636  const bool is_gene = (subtype == CSeqFeatData::eSubtype_gene);
3637 
3638  const bool okay_to_propage = (subtype != CSeqFeatData::eSubtype_mobile_element &&
3639  subtype != CSeqFeatData::eSubtype_centromere &&
3640  subtype != CSeqFeatData::eSubtype_telomere);
3641 
3642  const string* locus = (gene_ref->IsSetLocus() && !NStr::IsBlank(gene_ref->GetLocus())) ?
3643  &gene_ref->GetLocus() : nullptr;
3644  const string* desc = (gene_ref->IsSetDesc() && !NStr::IsBlank(gene_ref->GetDesc())) ?
3645  &gene_ref->GetDesc() : nullptr;
3646  const TGeneSyn* syn = (gene_ref->IsSetSyn() && !gene_ref->GetSyn().empty()) ?
3647  &gene_ref->GetSyn() : nullptr;
3648  const string* locus_tag =
3649  (gene_ref->IsSetLocus_tag() && !NStr::IsBlank(gene_ref->GetLocus_tag())) ?
3650  &gene_ref->GetLocus_tag() : nullptr;
3651 
3652  if ( ctx.IsProt() ) {
3653  // skip if GenPept format and not gene or CDS
3654  if (subtype != CSeqFeatData::eSubtype_gene && subtype != CSeqFeatData::eSubtype_cdregion) {
3655  return;
3656  }
3657  }
3658 
3659  // gene:
3660  if ( !from_overlap || okay_to_propage ) {
3661  if (locus) {
3662  m_Gene = *locus;
3663  }
3664  else if (desc && okay_to_propage) {
3665  m_Gene = *desc;
3666  }
3667  else if (syn) {
3668  CGene_ref::TSyn syns = *syn;
3669  m_Gene = syns.front();
3670  }
3671  if( !m_Gene.empty() ) {
3672  // we suppress the /gene qual when there's no locus but there is a locus tag (imitates C toolkit)
3673  if (locus || ! locus_tag) {
3675  }
3676  }
3677  }
3678 
3679  // locus tag:
3680  if ( gene_ref || okay_to_propage ) {
3681  if (locus) {
3682  if (locus_tag) {
3684  }
3685  }
3686  else if (locus_tag) {
3688  }
3689  }
3690 
3691  // gene desc:
3692  if ( gene_ref || okay_to_propage ) {
3693  if (locus) {
3694  if (is_gene && desc) {
3695  string desc_cleaned = *desc;
3696  RemovePeriodFromEnd( desc_cleaned, true );
3697  x_AddQual(eFQ_gene_desc, new CFlatStringQVal(desc_cleaned));
3698  }
3699  }
3700  else if (locus_tag) {
3701  if (is_gene && desc) {
3703  }
3704  }
3705  }
3706 
3707  // gene syn:
3708  if ( gene_ref || okay_to_propage ) {
3709  if (locus) {
3710  if (syn) {
3712  }
3713  } else if (locus_tag) {
3714  if (syn) {
3716  }
3717  } else if (desc) {
3718  if (syn) {
3720  }
3721  } else if (syn) {
3722  CGene_ref::TSyn syns = *syn;
3723  syns.pop_front();
3724  // ... and the rest as synonyms
3725  if (syn) {
3727  }
3728  }
3729  }
3730 
3731  // gene nomenclature
3732  if( gene_ref->IsSetFormal_name() && subtype == CSeqFeatData::eSubtype_gene ) {
3734  }
3735 
3736  // gene allele:
3737  {{
3738  // these bool vars just break up the if-statement to make it easier to understand
3739  const bool is_type_where_allele_from_gene_forbidden = (subtype == CSeqFeatData::eSubtype_variation);
3740  const bool is_type_where_allele_from_gene_forbidden_except_with_embl_or_ddbj =
3742  subtype == CSeqFeatData::eSubtype_centromere ||
3743  subtype == CSeqFeatData::eSubtype_telomere );
3744  const bool is_embl_or_ddbj = ( GetContext()->IsEMBL() || GetContext()->IsDDBJ() );
3745  if ( ! is_type_where_allele_from_gene_forbidden &&
3746  ( is_embl_or_ddbj || ! is_type_where_allele_from_gene_forbidden_except_with_embl_or_ddbj ) )
3747  {
3748  if (gene_ref->IsSetAllele() && !NStr::IsBlank(gene_ref->GetAllele())) {
3751  }
3752  }
3753  }}
3754 
3755  // gene xref:
3756  if (gene_ref->IsSetDb()) {
3757  x_AddQual(eFQ_gene_xref, new CFlatXrefQVal(gene_ref->GetDb()));
3758  }
3759 
3760  // gene db-xref:
3761  switch (m_Feat.GetData().Which()) {
3762  case CSeqFeatData::e_Rna:
3764  if (gene_feat && gene_feat->IsSetDbxref()) {
3765  CSeq_feat::TDbxref xrefs = gene_feat->GetDbxref();
3766  if (m_Feat.IsSetDbxref()) {
3768  for (CSeq_feat::TDbxref::iterator i = xrefs.begin();
3769  i != xrefs.end(); ++i) {
3770  if ((*i)->Equals(**it)) {
3771  xrefs.erase(i);
3772  break;
3773  }
3774  }
3775  }
3776  }
3777  if (xrefs.size()) {
3778  x_AddQual(eFQ_db_xref, new CFlatXrefQVal(xrefs));
3779  }
3780  }
3781  break;
3782 
3783  default:
3784  break;
3785  }
3786 
3787  // gene map:
3788  if (!from_overlap && gene_ref->IsSetMaploc() && subtype == CSeqFeatData::eSubtype_gene) {
3789  x_AddQual(eFQ_gene_map, new CFlatStringQVal(gene_ref->GetMaploc()));
3790  }
3791 
3792  // gene pseudogene qual:
3793 
3794  // inherit pseudogene, if possible
3795  if( gene_feat && ! x_HasQual(eFQ_pseudogene) ) {
3796  const string & strPseudoGene = gene_feat->GetNamedQual("pseudogene");
3797  x_AddQual(eFQ_pseudogene, new CFlatStringQVal(strPseudoGene) );
3798  }
3799 }
3800 
3801 // ----------------------------------------------------------------------------
3804  bool pseudo)
3805 // ----------------------------------------------------------------------------
3806 {
3807  _ASSERT( m_Feat.GetData().IsProt() );
3808 
3809  const CSeqFeatData& data = m_Feat.GetData();
3810  const CProt_ref& pref = data.GetProt();
3811  CProt_ref::TProcessed processed = pref.GetProcessed();
3812 
3813  //cerr << MSerial_AsnText << m_Feat.GetOriginalFeature();
3814 
3815  if ( ctx.IsNuc() || (ctx.IsProt() && !IsMappedFromProt()) ) {
3816  if ( pref.IsSetName() && !pref.GetName().empty() ) {
3817  const CProt_ref::TName& names = pref.GetName();
3818  x_AddQual(eFQ_product, new CFlatStringQVal(names.front()));
3819  if (names.size() > 1) {
3821  }
3822  }
3823  if ( pref.IsSetDesc() && !pref.GetDesc().empty() ) {
3824  if ( !ctx.IsProt() ) {
3825  string desc = pref.GetDesc();
3826  TrimSpacesAndJunkFromEnds(desc, true);
3827  bool add_period = RemovePeriodFromEnd(desc, true);
3828  CRef<CFlatStringQVal> prot_desc(new CFlatStringQVal(desc));
3829  if (add_period) {
3830  prot_desc->SetAddPeriod();
3831  }
3832  x_AddQual(eFQ_prot_desc, prot_desc);
3833 // had_prot_desc = true;
3834  } else {
3836  }
3837  }
3838  if ( pref.IsSetActivity() && !pref.GetActivity().empty() ) {
3839  ITERATE (CProt_ref::TActivity, it, pref.GetActivity()) {
3840  if (!NStr::IsBlank(*it)) {
3842  }
3843  }
3844  }
3845  if (pref.IsSetEc() && !pref.GetEc().empty()) {
3846  ITERATE(CProt_ref::TEc, ec, pref.GetEc()) {
3847  if ( !ctx.Config().DropIllegalQuals() || s_IsLegalECNumber(*ec)) {
3849  }
3850  }
3851  }
3852  if ( m_Feat.IsSetProduct() ) {
3854  ctx.GetScope().GetBioseqHandle( m_Feat.GetProductId() );
3855  if ( prot ) {
3857  } else {
3858  try {
3859  const CSeq_id& prod_id =
3860  GetId( m_Feat.GetProduct(), &ctx.GetScope());
3861  if ( ctx.IsRefSeq() || !ctx.Config().ForGBRelease() ) {
3862  x_AddQual(eFQ_protein_id, new CFlatSeqIdQVal(prod_id));
3863  }
3864  } catch (CObjmgrUtilException&) {}
3865  }
3866  }
3867  } else { // protein feature on subpeptide bioseq
3869  }
3870  if ( !pseudo && ( ctx.Config().ShowPeptides() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) ) {
3871  if ( processed == CProt_ref::eProcessed_mature ||
3872  processed == CProt_ref::eProcessed_signal_peptide ||
3874  processed == CProt_ref::eProcessed_propeptide ) {
3875  CSeqVector pep(m_Feat.GetLocation(), ctx.GetScope());
3877  string peptide;
3878  pep.GetSeqData(pep.begin(), pep.end(), peptide);
3879  if (!NStr::IsBlank(peptide)) {
3880  x_AddQual(eFQ_peptide, new CFlatStringQVal(peptide));
3881  }
3882  }
3883  }
3884 
3885  ///
3886  /// report molecular weights
3887  ///
3888  if (ctx.IsProt() && ( ctx.IsRefSeq() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) && ! IsMappedFromProt() &&
3889  ! ( m_Feat.IsSetPartial() && m_Feat.GetPartial() ) &&
3892  ! pseudo )
3893  {
3894  double wt = 0;
3895  bool has_mat_peptide = false;
3896  bool has_propeptide = false;
3897  bool has_signal_peptide = false;
3898 
3900 
3901  const bool is_pept_whole_loc = loc->IsWhole() ||
3902  ( loc->GetStart(eExtreme_Biological) == 0 &&
3903  loc->GetStop(eExtreme_Biological) == (ctx.GetHandle().GetBioseqLength() - 1) );
3904 
3905  if (processed == CProt_ref::eProcessed_not_set ||
3906  processed == CProt_ref::eProcessed_preprotein )
3907  {
3908  SAnnotSelector sel = ctx.SetAnnotSelector();
3910  for (CFeat_CI feat_it(ctx.GetHandle(), sel); feat_it; ++feat_it) {
3911  bool copy_loc = false;
3912  switch (feat_it->GetData().GetProt().GetProcessed()) {
3915  {{
3916  has_signal_peptide = true;
3917  if ( (feat_it->GetLocation().GetTotalRange().GetFrom() ==
3919  ! feat_it->GetLocation().Equals( m_Feat.GetLocation() ) ) {
3920  loc = loc->Subtract(feat_it->GetLocation(),
3922  nullptr, nullptr);
3923  }
3924  }}
3925  break;
3926 
3928  has_mat_peptide = true;
3929  break;
3930 
3932  has_propeptide = true;
3933  break;
3934 
3935  default:
3936  break;
3937  }
3938 
3939  if (copy_loc) {
3940  /// we need to adjust our location to the end of the signal
3941  /// peptide
3942  CRef<CSeq_loc> l(new CSeq_loc);
3943  loc = l;
3944  l->Assign(m_Feat.GetLocation());
3945  l->SetInt().SetTo
3946  (feat_it->GetLocation().GetTotalRange().GetTo());
3947  }
3948  }
3949  }
3950 
3951  /**
3952  CMolInfo::TCompleteness comp = CMolInfo::eCompleteness_partial;
3953  {{
3954  CConstRef<CMolInfo> molinfo
3955  (sequence::GetMolInfo(ctx.GetHandle()));
3956  if (molinfo) {
3957  comp = molinfo->GetCompleteness();
3958  }
3959  }}
3960  **/
3961 
3963 
3964  bool proteinIsAtLeastMature;
3965  switch( pref.GetProcessed() ) {
3968  proteinIsAtLeastMature = false;
3969  break;
3970  default:
3971  proteinIsAtLeastMature = true;
3972  break;
3973  }
3974 
3975  if ( (!has_mat_peptide || !has_signal_peptide || !has_propeptide) || (proteinIsAtLeastMature) || (!is_pept_whole_loc) ) {
3976  try {
3977  const TGetProteinWeight flags = 0;
3979  ctx.GetScope(), loc, flags);
3980  }
3981  catch (CException&) {
3982  }
3983  }
3984  }
3985 
3986  /// note: we report the weight rounded to the nearest int
3987  if (wt) {
3989  new CFlatIntQVal((int(wt + 0.5))));
3990  }
3991  }
3992 
3993  // cleanup
3994  if ( processed == CProt_ref::eProcessed_signal_peptide ||
3995  processed == CProt_ref::eProcessed_transit_peptide ) {
3996  if ( !ctx.IsRefSeq() ) {
3997  // Only RefSeq allows product on signal or transit peptide
3999  }
4000  }
4001  if ( processed == CProt_ref::eProcessed_preprotein &&
4002  !ctx.IsRefSeq() && !ctx.IsProt() &&
4004  const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
4005  if (product) {
4006  x_AddQual(eFQ_encodes, new CFlatStringQVal("encodes " + product->GetValue()));
4008  }
4009  }
4010 }
4011 
4012 
4013 static void s_ParseParentQual(const CGb_qual& gbqual, list<string>& vals)
4014 {
4015  vals.clear();
4016 
4017  if (!gbqual.IsSetVal() || NStr::IsBlank(gbqual.GetVal())) {
4018  return;
4019  }
4020 
4021  const string& val = gbqual.GetVal();
4022 
4023  if (val.length() > 1 && NStr::StartsWith(val, '(') &&
4024  NStr::EndsWith(val, ')') && val.find(',') != NPOS) {
4025  NStr::Split(val, "(,)", vals, NStr::fSplit_Tokenize);
4026  } else {
4027  vals.push_back(val);
4028  }
4029 
4030  list<string>::iterator it = vals.begin();
4031  while (it != vals.end()) {
4032  if (NStr::IsBlank(*it)) {
4033  it = vals.erase(it);
4034  } else {
4035  ConvertQuotes(*it);
4036  ExpandTildes(*it, eTilde_space);
4037  ++it;
4038  }
4039  }
4040 }
4041 
4042 
4044  const char* m_Name;
4046 
4047  operator string(void) const { return m_Name; }
4048 };
4049 
4050 
4051 static bool s_IsValidDirection(const string& direction) {
4052  return NStr::EqualNocase(direction, "LEFT") ||
4053  NStr::EqualNocase(direction, "RIGHT") ||
4054  NStr::EqualNocase(direction, "BOTH");
4055 }
4056 
4057 
4058 static bool s_IsValidnConsSplice(const string& cons_splice) {
4059  return NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:YES)") ||
4060  NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:NO)") ||
4061  NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:ABSENT)") ||
4062  NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:YES)") ||
4063  NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:NO)") ||
4064  NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:ABSENT)") ||
4065  NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:YES)") ||
4066  NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:NO)") ||
4067  NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:ABSENT)");
4068 }
4069 
4070 // currently just converts PMIDs into links
4071 static void
4072 s_HTMLizeExperimentQual( string &out_new_val, const string &val)
4073 {
4074  static const string kPmid("PMID:");
4075 
4076  // just to make sure
4077  out_new_val.clear();
4078 
4079  // str_pos should generally be considered as holding the first position
4080  // in val that we have not yet processed and copied to out_new_val.
4081  SIZE_TYPE str_pos = 0;
4082  while( str_pos < val.length() ) {
4083 
4084  // find next "PMID:" to process
4085  const SIZE_TYPE pmid_label_pos = val.find( "PMID:", str_pos );
4086  if( pmid_label_pos == NPOS ) {
4087  // no more PMIDs left.
4088  // copy the rest of the string and let's leave
4089  copy( val.begin() + str_pos, val.end(), back_inserter(out_new_val) );
4090  return;
4091  }
4092 
4093  // copy val up to just after "PMID:"
4094  const SIZE_TYPE first_pmid_pos = pmid_label_pos + kPmid.length();
4095  copy( val.begin() + str_pos, val.begin() + first_pmid_pos, back_inserter(out_new_val) );
4096  str_pos = first_pmid_pos;
4097 
4098  // push pmids (with links) onto the output
4099  // we consider the pmids to be numbers separated by one or more spaces and/or commas.
4100  bool first_num = true;
4101  while( str_pos < val.length() ) {
4102  // skip spaces and commas before pmid
4103  const SIZE_TYPE next_pmid_pos = val.find_first_not_of(" ,", str_pos);
4104  if( next_pmid_pos == NPOS || ! isdigit(val[next_pmid_pos]) ) {
4105  break;
4106  }
4107 
4108  // find end of pmid
4109  SIZE_TYPE end_of_pmid_pos = val.find_first_not_of("0123456789", next_pmid_pos );
4110  if( NPOS == end_of_pmid_pos ) {
4111  end_of_pmid_pos = val.length();
4112  }
4113 
4114  // extract the actual pmid
4115  string pmid = val.substr(next_pmid_pos, end_of_pmid_pos - next_pmid_pos );
4116 
4117  // write pmid with link
4118  if( ! first_num ) {
4119  out_new_val += ',';
4120  }
4121  out_new_val += "<a href=\"";
4122  out_new_val += strLinkBasePubmed;
4123  out_new_val += pmid;
4124  out_new_val += "\">";
4125  out_new_val += pmid;
4126  out_new_val += "</a>";
4127  str_pos = end_of_pmid_pos;
4128 
4129  first_num = false;
4130  }
4131  }
4132 }
4133 
4134 // ----------------------------------------------------------------------------
4136  CBioseqContext& ctx )
4137 // ----------------------------------------------------------------------------
4138 {
4140 
4141  typedef SStaticPair<const char*, EFeatureQualifier> TLegalImport;
4142  static const TLegalImport kLegalImports[] = {
4143  // Must be in case-insensitive alphabetical order!
4144 #define DO_IMPORT(x) { #x, eFQ_##x }
4145  DO_IMPORT(allele),
4146  DO_IMPORT(bound_moiety),
4147  DO_IMPORT(circular_RNA),
4148  DO_IMPORT(clone),
4149  DO_IMPORT(codon),
4150  DO_IMPORT(compare),
4151  DO_IMPORT(cons_splice),
4152  DO_IMPORT(cyt_map),
4153  DO_IMPORT(direction),
4154  DO_IMPORT(EC_number),
4155  DO_IMPORT(estimated_length),
4156  DO_IMPORT(evidence),
4157  DO_IMPORT(experiment),
4158  DO_IMPORT(frequency),
4159  DO_IMPORT(function),
4160  DO_IMPORT(gap_type),
4161  DO_IMPORT(gen_map),
4162  DO_IMPORT(inference),
4163  DO_IMPORT(insertion_seq),
4164  DO_IMPORT(label),
4165  DO_IMPORT(linkage_evidence),
4166  DO_IMPORT(map),
4167  DO_IMPORT(mobile_element),
4168  DO_IMPORT(mobile_element_type),
4169  DO_IMPORT(mod_base),
4170  DO_IMPORT(ncRNA_class),
4171  DO_IMPORT(number),
4172  DO_IMPORT(old_locus_tag),
4173  DO_IMPORT(operon),
4174  DO_IMPORT(organism),
4175  DO_IMPORT(PCR_conditions),
4176  DO_IMPORT(phenotype),
4177  DO_IMPORT(product),
4178  DO_IMPORT(pseudogene),
4179  DO_IMPORT(rad_map),
4180  DO_IMPORT(recombination_class),
4181  DO_IMPORT(regulatory_class),
4182  DO_IMPORT(replace),
4183  DO_IMPORT(ribosomal_slippage),
4184  DO_IMPORT(rpt_family),
4185  DO_IMPORT(rpt_type),
4186  DO_IMPORT(rpt_unit),
4187  DO_IMPORT(rpt_unit_range),
4188  DO_IMPORT(rpt_unit_seq),
4189  DO_IMPORT(satellite),
4190  DO_IMPORT(standard_name),
4191  DO_IMPORT(tag_peptide),
4192  DO_IMPORT(trans_splicing),
4193  DO_IMPORT(transposon),
4194  DO_IMPORT(UniProtKB_evidence),
4195  DO_IMPORT(usedin)
4196 #undef DO_IMPORT
4197  };
4199  DEFINE_STATIC_ARRAY_MAP(TLegalImportMap, kLegalImportMap, kLegalImports);
4200 
4201  bool check_qual_syntax = ctx.Config().CheckQualSyntax();
4202 
4203  const bool old_locus_tag_added_elsewhere = x_HasQual(eFQ_old_locus_tag);
4204 
4205  bool first_pseudogene = true;
4206 
4207  vector<string> replace_quals;
4208  const CSeq_feat_Base::TQual & qual = m_Feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
4209  ITERATE( CSeq_feat::TQual, it, qual ) {
4210  if (!(*it)->IsSetQual() || !(*it)->IsSetVal()) {
4211  continue;
4212  }
4213  const string& val = (*it)->GetVal();
4214 
4215  const char* name = (*it)->GetQual().c_str();
4216  const TLegalImportMap::const_iterator li = kLegalImportMap.find(name);
4218  if ( li != kLegalImportMap.end() ) {
4219  slot = li->second;
4220  } else if (check_qual_syntax) {
4221  continue;
4222  }
4223 
4224  // only certain slot types may have an empty value (e.g. M96433)
4225  switch(slot) {
4226  case eFQ_replace:
4227  case eFQ_pseudogene:
4228  // empty value allowed for these slot types, so we don't check
4229  break;
4230  default:
4231  // empty value forbidden for other slot types
4232  if( val.empty() ) {
4233  continue;
4234  }
4235  break;
4236  }
4237 
4238  switch (slot) {
4239  case eFQ_allele:
4240  // if /allele inherited from gene, suppress allele gbqual on feature
4241  if (x_HasQual(eFQ_gene_allele)) {
4242  continue;
4243  } else {
4244  x_AddQual(slot, new CFlatStringQVal(val,
4246  }
4247  break;
4248  case eFQ_codon:
4249  if ((*it)->IsSetVal() && !NStr::IsBlank(val)) {
4251  }
4252  break;
4253  case eFQ_cons_splice:
4254  if ((*it)->IsSetVal()) {
4255  if (!check_qual_syntax || s_IsValidnConsSplice(val)) {
4256  x_AddQual(slot, new CFlatStringQVal(val));
4257  }
4258  }
4259  break;
4260  case eFQ_direction:
4261  if ((*it)->IsSetVal()) {
4262  if (!check_qual_syntax || s_IsValidDirection(val)) {
4263  x_AddQual(slot, new CFlatNumberQVal(val));
4264  }
4265  }
4266  break;
4267  case eFQ_estimated_length:
4268  case eFQ_mod_base:
4269  case eFQ_number:
4270  if ((*it)->IsSetVal() && !NStr::IsBlank(val)) {
4271  x_AddQual(slot, new CFlatNumberQVal(val));
4272  }
4273  break;
4274  case eFQ_rpt_type:
4275  x_AddRptTypeQual(val, check_qual_syntax);
4276  break;
4277  case eFQ_rpt_unit:
4278  if ((*it)->IsSetVal()) {
4280  }
4281  break;
4282  case eFQ_usedin:
4283  {{
4284  list<string> vals;
4285  s_ParseParentQual(**it, vals);
4286  ITERATE (list<string>, i, vals) {
4288  }
4289  break;
4290  }}
4291  case eFQ_old_locus_tag:
4292  {{
4293  if( ! old_locus_tag_added_elsewhere ) {
4294  list<string> vals;
4295  s_ParseParentQual(**it, vals);
4296  ITERATE (list<string>, i, vals) {
4298  }
4299  }
4300  break;
4301  }}
4302  case eFQ_rpt_family:
4303  if ((*it)->IsSetVal() && !NStr::IsBlank(val)) {
4304  x_AddQual(slot, new CFlatStringQVal(val));
4305  }
4306  break;
4307  case eFQ_label:
4308  x_AddQual(slot, new CFlatLabelQVal(val));
4309  break;
4310  case eFQ_EC_number:
4311  if ((*it)->IsSetVal() &&
4312  ( ! ctx.Config().DropIllegalQuals() || s_IsLegalECNumber(val) ) ) {
4313  x_AddQual(slot, new CFlatStringQVal(val));
4314  }
4315  break;
4316  case eFQ_illegal_qual:
4317  if ( ctx.UsingSeqEntryIndex() && NStr::CompareNocase (name, "transl_except") == 0 ) {
4318  break;
4319  }
4320  x_AddQual(slot, new CFlatIllegalQVal(**it));
4321  break;
4322  case eFQ_product:
4323  if (!x_HasQual(eFQ_product)) {
4324  x_AddQual(slot, new CFlatStringQVal(val));
4325  } else {
4326  const CFlatStringQVal* gene = x_GetStringQual(eFQ_gene);
4327  const string& gene_val =
4328  gene ? gene->GetValue() : kEmptyStr;
4329  const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
4330  const string& product_val =
4331  product ? product->GetValue() : kEmptyStr;
4332  if (val != gene_val && val != product_val) {
4333  if ( ! ctx.Config().CodonRecognizedToNote() ||
4335  NStr::Find(val, "RNA") == NPOS )
4336  {
4338  }
4339  }
4340  }
4341  break;
4342  case eFQ_compare:
4343  {{
4344  list<string> vals;
4345  s_ParseParentQual(**it, vals);
4346  ITERATE (list<string>, i, vals) {
4347  if (!ctx.Config().CheckQualSyntax() ||
4350  }
4351  }
4352  }}
4353  break;
4354  case eFQ_evidence:
4355  {{
4356  if ( val == "EXPERIMENTAL" ) {
4358  } else if ( val == "NOT_EXPERIMENTAL" ) {
4360  }
4361  }}
4362  break;
4363 
4364  case eFQ_rpt_unit_range:
4366  break;
4367 
4368  case eFQ_replace:
4369  {{
4370  string s(val);
4371  if (string::npos == s.find_first_not_of("ACGTUacgtu")) {
4372  NStr::ToLower(s);
4373  NStr::ReplaceInPlace(s, "u", "t");
4374  }
4375  replace_quals.push_back(s);
4376  }}
4377  break;
4378 
4379  case eFQ_operon:
4380  {{
4381  if( ! x_HasQual(eFQ_operon) ) {
4382  x_AddQual(slot, new CFlatStringQVal(val));
4383  }
4384  }}
4385  break;
4386 
4387  case eFQ_experiment:
4388  {{
4389  if( ctx.Config().DoHTML() && ! CommentHasSuspiciousHtml(val) ) {
4390  string new_val;
4391  s_HTMLizeExperimentQual(new_val, val);
4392  x_AddQual(slot, new CFlatStringQVal(new_val));
4393  } else {
4394  x_AddQual(slot, new CFlatStringQVal(val));
4395  }
4396  }}
4397  break;
4398 
4399  case eFQ_clone:
4401  break;
4402 
4403  case eFQ_pseudogene:
4404 
4405  // our pseudogene(s) override(s) any that existed before
4406  if( first_pseudogene ) {
4407  first_pseudogene = false;
4409  }
4410  x_AddQual(slot, new CFlatStringQVal(val));
4411 
4412  break;
4413 
4414  case eFQ_regulatory_class:
4415  x_AddRegulatoryClassQual(val, check_qual_syntax);
4416  break;
4417 
4419  x_AddRecombinationClassQual(val, check_qual_syntax);
4420  break;
4421 
4422  default:
4423  x_AddQual(slot, new CFlatStringQVal(val));
4424  break;
4425  }
4426  }
4427 
4428  if (replace_quals.size()) {
4429  std::sort(replace_quals.begin(), replace_quals.end());
4430  ITERATE (vector<string>, it, replace_quals) {
4432  }
4433  }
4434 
4435  // some "map-related" qual adjustments
4436  if( ctx.Config().HideSpecificGeneMaps() && ! x_HasQual(eFQ_map) ) {
4437  if( x_HasQual(eFQ_cyt_map) ) {
4438  x_AddQual(eFQ_map, x_GetQual(eFQ_cyt_map)->second );
4439  } else if( x_HasQual(eFQ_gen_map) ) {
4440  x_AddQual(eFQ_map, x_GetQual(eFQ_gen_map)->second );
4441  } else if( x_HasQual(eFQ_rad_map) ) {
4442  x_AddQual(eFQ_map, x_GetQual(eFQ_rad_map)->second );
4443  }
4447  }
4448 }
4449 
4450 // ----------------------------------------------------------------------------
4452  const string& rpt_unit )
4453 // ----------------------------------------------------------------------------
4454 {
4455  if (rpt_unit.empty()) {
4456  return;
4457  }
4458 
4459  vector<string> units;
4460 
4461  if (NStr::StartsWith(rpt_unit, '(') && NStr::EndsWith(rpt_unit, ')') &&
4462  NStr::Find(rpt_unit, "(", 1) == NPOS) {
4463  string tmp = rpt_unit.substr(1, rpt_unit.length() - 2);
4464  NStr::Split(tmp, ",", units, 0);
4465  } else {
4466  units.push_back(rpt_unit);
4467  }
4468 
4469  NON_CONST_ITERATE (vector<string>, it, units) {
4470  if (!it->empty()) {
4473  }
4474  }
4475 }
4476 
4477 
4478 // ----------------------------------------------------------------------------
4480  const string& rpt_type,
4481  bool check_qual_syntax )
4482 // ----------------------------------------------------------------------------
4483 {
4484  if (rpt_type.empty()) {
4485  return;
4486  }
4487 
4488  string value( rpt_type );
4490 
4491  vector<string> pieces;
4493 
4494  ITERATE( vector<string>, it, pieces ) {
4495  if ( ! check_qual_syntax || CGb_qual::IsValidRptTypeValue( *it ) ) {
4497  }
4498  }
4499 }
4500 
4501 
4502 static bool s_IsValidRegulatoryClass(const string& type)
4503 {
4504  vector<string> valid_types = CSeqFeatData::GetRegulatoryClassList();
4505 
4506  FOR_EACH_STRING_IN_VECTOR (itr, valid_types) {
4507  string str = *itr;
4508  if (NStr::Equal (str, type)) return true;
4509  }
4510 
4511  return false;
4512 }
4513 
4514 static bool s_IsValidRecombinationClass(const string& type)
4515 {
4516  vector<string> valid_types = CSeqFeatData::GetRecombinationClassList();
4517 
4518  FOR_EACH_STRING_IN_VECTOR (itr, valid_types) {
4519  string str = *itr;
4520  if (NStr::Equal (str, type)) return true;
4521  }
4522 
4523  return false;
4524 }
4525 
4526 // ----------------------------------------------------------------------------
4528  const string& recombination_class,
4529  bool check_qual_syntax
4530 )
4531 // ----------------------------------------------------------------------------
4532 {
4533  if (recombination_class.empty()) {
4534  return;
4535  }
4536 
4537  string recomb_class = recombination_class;
4538 
4539  if (NStr::StartsWith(recomb_class, "other:")) {
4540  NStr::TrimPrefixInPlace(recomb_class, "other:");
4541  NStr::TruncateSpacesInPlace(recomb_class);
4542  }
4543  if ( s_IsValidRecombinationClass( recomb_class ) ) {
4544  x_AddQual( eFQ_recombination_class, new CFlatStringQVal(recomb_class));
4545  } else {
4547  x_AddQual( eFQ_seqfeat_note, new CFlatStringQVal(recomb_class));
4548  }
4549 }
4550 
4551 
4552 // ----------------------------------------------------------------------------
4554  const string& regulatory_class,
4555  bool check_qual_syntax
4556 )
4557 // ----------------------------------------------------------------------------
4558 {
4559  if (regulatory_class.empty()) {
4560  return;
4561  }
4562 
4563  string reg_class = regulatory_class;
4564 
4565  if (NStr::StartsWith(reg_class, "other:")) {
4566  NStr::TrimPrefixInPlace(reg_class, "other:");
4567  NStr::TruncateSpacesInPlace(reg_class);
4568  }
4569  if ( s_IsValidRegulatoryClass( reg_class ) ) {
4571  } else if (NStr::CompareNocase(reg_class, "other") == 0 &&
4572  m_Feat.IsSetComment() && !m_Feat.GetComment().empty()) {
4574  } else {
4576  x_AddQual( eFQ_seqfeat_note, new CFlatStringQVal(reg_class));
4577  }
4578 }
4579 
4580 
4582 {
4583  const CFlatFileConfig& cfg = GetContext()->Config();
4584 
4585  if ( cfg.IsFormatFTable() ) {
4586  ff.SetQuals() = m_FTableQuals;
4587  return;
4588  }
4589 
4590  ff.SetQuals().reserve(m_Quals.Size());
4591  CFlatFeature::TQuals& qvec = ff.SetQuals();
4592 
4593 #define DO_QUAL(x) x_FormatQual(eFQ_##x, #x, qvec)
4594  DO_QUAL(ncRNA_class);
4595  DO_QUAL(regulatory_class);
4596  DO_QUAL(recombination_class);
4597 
4598  DO_QUAL(partial);
4599  DO_QUAL(gene);
4600 
4601  DO_QUAL(locus_tag);
4602  DO_QUAL(old_locus_tag);
4603 
4604  x_FormatQual(eFQ_gene_syn_refseq, "synonym", qvec);
4605  DO_QUAL(gene_syn);
4606 
4607  x_FormatQual(eFQ_gene_allele, "allele", qvec);
4608 
4609  DO_QUAL(operon);
4610 
4611  DO_QUAL(product);
4612 
4613  x_FormatQual(eFQ_prot_EC_number, "EC_number", qvec);
4614  x_FormatQual(eFQ_prot_activity, "function", qvec);
4615 
4616  DO_QUAL(standard_name);
4617  DO_QUAL(coded_by);
4618  DO_QUAL(derived_from);
4619 
4620  x_FormatQual(eFQ_prot_name, "name", qvec);
4621  DO_QUAL(region_name);
4622  DO_QUAL(bond_type);
4623  DO_QUAL(site_type);
4624  DO_QUAL(sec_str_type);
4625  DO_QUAL(heterogen);
4626  DO_QUAL(non_std_residue);
4627 
4628  DO_QUAL(tag_peptide);
4629 
4630  DO_QUAL(evidence);
4631  DO_QUAL(experiment);
4632  DO_QUAL(inference);
4633  DO_QUAL(exception);
4634  DO_QUAL(ribosomal_slippage);
4635  DO_QUAL(trans_splicing);
4636  DO_QUAL(circular_RNA);
4637  DO_QUAL(artificial_location);
4638 
4639  if ( !cfg.GoQualsToNote() ) {
4640  if( cfg.GoQualsEachMerge() ) {
4641  // combine all quals of a given type onto the same qual
4642  x_FormatGOQualCombined(eFQ_go_component, "GO_component", qvec);
4643  x_FormatGOQualCombined(eFQ_go_function, "GO_function", qvec);
4644  x_FormatGOQualCombined(eFQ_go_process, "GO_process", qvec);
4645  } else {
4646  x_FormatQual(eFQ_go_component, "GO_component", qvec);
4647  x_FormatQual(eFQ_go_function, "GO_function", qvec);
4648  x_FormatQual(eFQ_go_process, "GO_process", qvec);
4649  }
4650  }
4651 
4652  DO_QUAL(nomenclature);
4653 
4654  x_FormatNoteQuals(ff);
4655  DO_QUAL(citation);
4656 
4657  DO_QUAL(number);
4658 
4659  DO_QUAL(pseudo);
4660  DO_QUAL(pseudogene);
4661  DO_QUAL(selenocysteine);
4662  DO_QUAL(pyrrolysine);
4663 
4664  DO_QUAL(codon_start);
4665 
4666  DO_QUAL(anticodon);
4667  if ( ! cfg.CodonRecognizedToNote() ) {
4668  DO_QUAL(trna_codons);
4669  }
4670  DO_QUAL(bound_moiety);
4671  DO_QUAL(clone);
4672  DO_QUAL(compare);
4673  // DO_QUAL(cons_splice);
4674  DO_QUAL(direction);
4675  DO_QUAL(function);
4676  DO_QUAL(frequency);
4677  DO_QUAL(EC_number);
4678  x_FormatQual(eFQ_gene_map, "map", qvec);
4679  // In certain modes, cyt_map, gen_map, and rad_map are
4680  // moved to eFQ_gene_map by x_ImportQuals:
4681  DO_QUAL(cyt_map);
4682  DO_QUAL(gen_map);
4683  DO_QUAL(rad_map);
4684  DO_QUAL(estimated_length);
4685  DO_QUAL(gap_type);
4686  DO_QUAL(linkage_evidence);
4687  DO_QUAL(allele);
4688  DO_QUAL(map);
4689  DO_QUAL(mod_base);
4690  DO_QUAL(PCR_conditions);
4691  DO_QUAL(phenotype);
4692  DO_QUAL(rpt_family);
4693  DO_QUAL(rpt_type);
4694  DO_QUAL(rpt_unit);
4695  DO_QUAL(rpt_unit_range);
4696  DO_QUAL(rpt_unit_seq);
4697  DO_QUAL(satellite);
4698  DO_QUAL(mobile_element);
4699  DO_QUAL(mobile_element_type);
4700  DO_QUAL(usedin);
4701 
4702  // extra imports, actually...
4703  x_FormatQual(eFQ_illegal_qual, "illegal", qvec);
4704 
4705  DO_QUAL(replace);
4706 
4707  DO_QUAL(transl_except);
4708  DO_QUAL(transl_table);
4709  DO_QUAL(codon);
4710  DO_QUAL(organism);
4711  DO_QUAL(label);
4712  x_FormatQual(eFQ_cds_product, "product", qvec);
4713  DO_QUAL(UniProtKB_evidence);
4714  DO_QUAL(protein_id);
4715  DO_QUAL(transcript_id);
4716  DO_QUAL(db_xref);
4717  x_FormatQual(eFQ_gene_xref, "db_xref", qvec);
4718  DO_QUAL(mol_wt);
4719  DO_QUAL(calculated_mol_wt);
4720  DO_QUAL(translation);
4721  DO_QUAL(transcription);
4722  DO_QUAL(peptide);
4723 
4724 #undef DO_QUAL
4725 }
4726 
4727 /*
4728 // check if str2 is a sub string of str1
4729 static bool s_IsRedundant(const string& str1, const string& str2)
4730 {
4731  size_t pos = NPOS;
4732  bool whole = false;
4733  for (pos = NStr::Find(str1, str2); pos != NPOS && !whole; pos += str2.length()) {
4734  whole = IsWholeWord(str1, pos);
4735  }
4736  return (pos != NPOS && whole);
4737 }
4738 
4739 
4740 // Remove redundant elements that occur twice or as part of other elements.
4741 static void s_PruneNoteQuals(CFlatFeature::TQuals& qvec)
4742 {
4743  if (qvec.empty()) {
4744  return;
4745  }
4746  CFlatFeature::TQuals::iterator it1 = qvec.begin();
4747  while (it1 != qvec.end()) {
4748  CFlatFeature::TQuals::iterator it2 = it1 + 1;
4749  const string& val1 = (*it1)->GetValue();
4750  while (it2 != qvec.end()) {
4751  const string& val2 = (*it2)->GetValue();
4752  if (s_IsRedundant(val1, val2)) {
4753  it2 = qvec.erase(it2);
4754  } else if (s_IsRedundant(val2, val1)) {
4755  break;
4756  } else {
4757  ++it2;
4758  }
4759  }
4760  if (it2 != qvec.end()) {
4761  it1 = qvec.erase(it1);
4762  } else {
4763  ++it1;
4764  }
4765  }
4766 }
4767 */
4768 
4770 {
4771  const CFlatFileConfig& cfg = GetContext()->Config();
4772  CFlatFeature::TQuals qvec;
4773 
4774 #define DO_NOTE(x) x_FormatNoteQual(eFQ_##x, GetStringOfFeatQual(eFQ_##x), qvec)
4775 #define DO_NOTE_PREPEND_NEWLINE(x) x_FormatNoteQual(eFQ_##x, GetStringOfFeatQual(eFQ_##x), qvec, IFlatQVal::fPrependNewline )
4776  DO_NOTE(transcript_id_note);
4777  DO_NOTE(gene_desc);
4778 
4779  if ( cfg.CodonRecognizedToNote() ) {
4780  DO_NOTE(trna_codons);
4781  }
4782  DO_NOTE(encodes);
4783  DO_NOTE(prot_desc);
4784  DO_NOTE(prot_note);
4785  DO_NOTE(prot_comment);
4786  DO_NOTE(prot_method);
4787  DO_NOTE(maploc);
4788  DO_NOTE(prot_conflict);
4789  DO_NOTE(prot_missing);
4790  DO_NOTE(seqfeat_note);
4791  DO_NOTE(region);
4792 // DO_NOTE(selenocysteine_note);
4793  DO_NOTE(prot_names);
4794  DO_NOTE(bond);
4795  DO_NOTE(site);
4796 // DO_NOTE(rrna_its);
4797  DO_NOTE(xtra_prod_quals);
4798 // DO_NOTE(inference_bad);
4799  DO_NOTE(modelev);
4800 // DO_NOTE(cdd_definition);
4801 // DO_NOTE(tag_peptide);
4802  DO_NOTE_PREPEND_NEWLINE(exception_note);
4803 
4804  string notestr;
4805  string suffix;
4806 // bool add_period = false;
4807  bool add_period = true/*fl*/;
4808 
4809  s_QualVectorToNote(qvec, true, notestr, suffix, add_period);
4810 
4811  if (GetContext()->Config().GoQualsToNote()) {
4812  qvec.clear();
4813  DO_NOTE(go_component);
4814  DO_NOTE(go_function);
4815  DO_NOTE(go_process);
4816  s_QualVectorToNote(qvec, false, notestr, suffix, add_period);
4817  }
4818  s_NoteFinalize(add_period, notestr, ff, eTilde_tilde);
4819 
4820 #undef DO_NOTE
4821 #undef DO_NOTE_PREPEND_NEWLINE
4822 }
4823 
4825 (EFeatureQualifier slot,
4826  const char* name,
4827  CFlatFeature::TQuals& qvec,
4828  IFlatQVal::TFlags flags) const
4829 {
4830  TQCI it = m_Quals.LowerBound(slot);
4831  TQCI end = m_Quals.end();
4832  while (it != end && it->first == slot) {
4833  it->second->Format(qvec, name, *GetContext(), flags);
4834  ++it;
4835  }
4836 }
4837 
4838 
4840 (EFeatureQualifier slot,
4841  const CTempString & name,
4842  CFlatFeature::TQuals& qvec,
4843  IFlatQVal::TFlags flags) const
4844 {
4846 
4847  TQCI it = m_Quals.LowerBound(slot);
4848  TQCI end = m_Quals.end();
4849  while (it != end && it->first == slot) {
4850  it->second->Format(qvec, name, *GetContext(), flags);
4851  ++it;
4852  }
4853 }
4854 
4855 // This produces one qual out of all the GO quals of the given slot, with their
4856 // values concatenated.
4858 (EFeatureQualifier slot,
4859  const CTempString & name,
4860  CFlatFeature::TQuals& qvec,
4861  TQualFlags flags) const
4862 {
4863  // copy all the given quals with that name since we need to sort them
4864  vector<CConstRef<CFlatGoQVal> > goQuals;
4865 
4866  TQCI it = m_Quals.LowerBound(slot);
4867  TQCI end = m_Quals.end();
4868  while (it != end && it->first == slot) {
4869  goQuals.push_back( CConstRef<CFlatGoQVal>( dynamic_cast<const CFlatGoQVal*>( it->second.GetNonNullPointer() ) ) );
4870  ++it;
4871  }
4872 
4873  if( goQuals.empty() ) {
4874  return;
4875  }
4876 
4877  stable_sort( goQuals.begin(), goQuals.end(), CGoQualLessThan() );
4878 
4879  CFlatFeature::TQuals temp_qvec;
4880 
4881  string combined;
4882 
4883 
4884  string::size_type this_part_beginning_text_string_pos = 0;
4885 
4886  // now concatenate their values into the variable "combined"
4887  const string* pLastQualTextString = nullptr;
4888  ITERATE( vector<CConstRef<CFlatGoQVal> >, iter, goQuals ) {
4889 
4890  // Use thisQualTextString to tell when we have consecutive quals with the
4891  // same text string.
4892  const string *pThisQualTextString = &(*iter)->GetTextString();
4893  if (! pThisQualTextString) {
4894  continue;
4895  }
4896 
4897  (*iter)->Format(temp_qvec, name, *GetContext(), flags);
4898 
4899  if(! pLastQualTextString || ! NStr::EqualNocase(*pLastQualTextString, *pThisQualTextString)) {
4900  // normal case: each CFlatGoQVal has its own part
4901  if( ! combined.empty() ) {
4902  combined += "; ";
4903  this_part_beginning_text_string_pos = combined.length() - 1;
4904  }
4905  combined += temp_qvec.back()->GetValue();
4906  } else {
4907  // consecutive CFlatGoQVal with the same text string: merge
4908  // (chop off the part up to and including the text string )
4909  const string & new_value = temp_qvec.back()->GetValue();
4910 
4911  // let text_string_pos point to the part *after* the text string
4912  SIZE_TYPE post_text_string_pos = NStr::FindNoCase( new_value, *pLastQualTextString );
4913  _ASSERT( post_text_string_pos != NPOS );
4914  post_text_string_pos += pLastQualTextString->length();
4915 
4916  // append the new part after the text string, but only
4917  // if it's not a duplicate
4918  string str_to_append = new_value.substr( post_text_string_pos,
4919  (pLastQualTextString->length() - post_text_string_pos) );
4920  if( NStr::Find(combined, str_to_append, this_part_beginning_text_string_pos) == NPOS ) {
4921  combined.append( str_to_append );
4922  }
4923  }
4924 
4925  pLastQualTextString = pThisQualTextString;
4926  }
4927  pLastQualTextString = nullptr; // just to make sure we don't accidentally use it
4928 
4929  // add the final merged CFormatQual
4930  if( ! combined.empty() ) {
4931  const string prefix = " ";
4932  const string suffix = ";";
4933  TFlatQual res(new CFormatQual(name, combined, prefix, suffix, CFormatQual::eQuoted ));
4934  qvec.push_back(res);
4935  }
4936 }
4937 
4939 {
4940  const IFlatQVal* qual = nullptr;
4941  if ( x_HasQual(slot) ) {
4942  qual = m_Quals.Find(slot)->second;
4943  }
4944  return dynamic_cast<const CFlatStringQVal*>(qual);
4945 }
4946 
4947 
4949 {
4950  IFlatQVal* qual = nullptr;
4951  if (x_HasQual(slot)) {
4952  qual = const_cast<IFlatQVal*>(&*m_Quals.Find(slot)->second);
4953  }
4954  return dynamic_cast<CFlatStringListQVal*>(qual);
4955 }
4956 
4958 {
4959  IFlatQVal* qual = nullptr;
4960  if (x_HasQual(slot)) {
4961  qual = const_cast<IFlatQVal*>(&*m_Quals.Find(slot)->second);
4962  }
4963  return dynamic_cast<CFlatProductNamesQVal*>(qual);
4964 }
4965 
4966 // maps each valid mobile_element_type prefix to whether it
4967 // must have more info after the prefix
4970  { "LINE", false },
4971  { "MITE", false },
4972  { "SINE", false },
4973  { "insertion sequence", false },
4974  { "integron", false },
4975  { "non-LTR retrotransposon", false },
4976  { "other", true },
4977  { "retrotransposon", false },
4978  { "transposon", false }
4979 };
4980 
4983 
4984 // returns whether or not it's valid
4985 bool s_ValidateMobileElementType( const string & mobile_element_type_value )
4986 {
4987  if( mobile_element_type_value.empty() ) {
4988  return false;
4989  }
4990 
4991  // if there's a colon, we ignore the part after the colon for testing purposes
4992  string::size_type colon_pos = mobile_element_type_value.find( ':' );
4993 
4994  const string value_before_colon = ( string::npos == colon_pos
4995  ? mobile_element_type_value
4996  : mobile_element_type_value.substr( 0, colon_pos ) );
4997 
4999  sm_MobileElemTypeKeys.find( value_before_colon.c_str() );
5000  if( prefix_info == sm_MobileElemTypeKeys.end() ) {
5001  return false; // prefix not found
5002  }
5003 
5004  // check if info required after prefix (colon plus info, actually)
5005  if( prefix_info->second ) {
5006  if( string::npos == colon_pos ) {
5007  return false; // no additional info supplied, even though required
5008  }
5009  }
5010 
5011  // all tests passed
5012  return true;
5013 }
5014 
5016 {
5017 public:
5018  explicit CInStringPred( const string &comparisonString )
5019  : m_ComparisonString( comparisonString )
5020  {}
5021 
5022  bool operator()( const string &arg ) {
5023  return NStr::Find( m_ComparisonString, arg ) != NPOS;
5024  }
5025 private:
5026  const string &m_ComparisonString;
5027 };
5028 
5030  const CGene_ref* gene_ref )
5031 {
5032  const TGeneSyn* gene_syn =
5033  (gene_ref && gene_ref->IsSetSyn() && !gene_ref->GetSyn().empty() )
5034  ?
5035  &gene_ref->GetSyn()
5036  :
5037  nullptr;
5038  const CBioseqContext& ctx = *GetContext();
5039 
5040  if (ctx.Config().DropIllegalQuals()) {
5042  }
5043 
5045  const CFlatStringQVal* gene = x_GetStringQual(eFQ_gene);
5046  const CFlatStringQVal* prot_desc = x_GetStringQual(eFQ_prot_desc);
5047  const CFlatStringQVal* standard_name = x_GetStringQual(eFQ_standard_name);
5048  const CFlatStringQVal* seqfeat_note = x_GetStringQual(eFQ_seqfeat_note);
5049 
5050  if (gene) {
5051  const string& gene_name = gene->GetValue();
5052 
5053  // /gene same as feature.comment will suppress /note
5054  if (m_Feat.IsSetComment()) {
5055  if (NStr::Equal(gene_name, m_Feat.GetComment())) {
5057  seqfeat_note = nullptr;
5058  }
5059  }
5060 
5061  // remove protein description that equals the gene name, case sensitive
5062  if (prot_desc) {
5063  if (s_StrEqualDisregardFinalPeriod(gene_name, prot_desc->GetValue(), NStr::eCase)) {
5065  prot_desc = nullptr;
5066  }
5067  }
5068 
5069  // remove prot name if equals gene
5070  if (prot_names) {
5071 
5072  CProt_ref::TName::iterator remove_start = prot_names->SetValue().begin();
5073  ++remove_start; // The "++" is because the first one shouldn't be erased since it's used for the product
5074  CProt_ref::TName::iterator new_end =
5075  remove( remove_start, prot_names->SetValue().end(), gene_name );
5076  prot_names->SetValue().erase( new_end, prot_names->SetValue().end() );
5077 
5078  if (prot_names->GetValue().empty()) {
5080